• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sksl/codegen/SkSLRasterPipelineCodeGenerator.h"
9 
10 #include "include/core/SkPoint.h"
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/base/SkEnumBitMask.h"
15 #include "src/base/SkStringView.h"
16 #include "src/base/SkUtils.h"
17 #include "src/core/SkTHash.h"
18 #include "src/sksl/SkSLAnalysis.h"
19 #include "src/sksl/SkSLBuiltinTypes.h"
20 #include "src/sksl/SkSLCompiler.h"
21 #include "src/sksl/SkSLConstantFolder.h"
22 #include "src/sksl/SkSLContext.h"
23 #include "src/sksl/SkSLDefines.h"
24 #include "src/sksl/SkSLIntrinsicList.h"
25 #include "src/sksl/SkSLOperator.h"
26 #include "src/sksl/SkSLPosition.h"
27 #include "src/sksl/analysis/SkSLProgramUsage.h"
28 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
29 #include "src/sksl/ir/SkSLBinaryExpression.h"
30 #include "src/sksl/ir/SkSLBlock.h"
31 #include "src/sksl/ir/SkSLBreakStatement.h"
32 #include "src/sksl/ir/SkSLChildCall.h"
33 #include "src/sksl/ir/SkSLConstructor.h"
34 #include "src/sksl/ir/SkSLConstructorDiagonalMatrix.h"
35 #include "src/sksl/ir/SkSLConstructorMatrixResize.h"
36 #include "src/sksl/ir/SkSLConstructorSplat.h"
37 #include "src/sksl/ir/SkSLContinueStatement.h"
38 #include "src/sksl/ir/SkSLDoStatement.h"
39 #include "src/sksl/ir/SkSLExpression.h"
40 #include "src/sksl/ir/SkSLExpressionStatement.h"
41 #include "src/sksl/ir/SkSLFieldAccess.h"
42 #include "src/sksl/ir/SkSLForStatement.h"
43 #include "src/sksl/ir/SkSLFunctionCall.h"
44 #include "src/sksl/ir/SkSLFunctionDeclaration.h"
45 #include "src/sksl/ir/SkSLFunctionDefinition.h"
46 #include "src/sksl/ir/SkSLIRNode.h"
47 #include "src/sksl/ir/SkSLIfStatement.h"
48 #include "src/sksl/ir/SkSLIndexExpression.h"
49 #include "src/sksl/ir/SkSLLayout.h"
50 #include "src/sksl/ir/SkSLLiteral.h"
51 #include "src/sksl/ir/SkSLModifierFlags.h"
52 #include "src/sksl/ir/SkSLPostfixExpression.h"
53 #include "src/sksl/ir/SkSLPrefixExpression.h"
54 #include "src/sksl/ir/SkSLProgram.h"
55 #include "src/sksl/ir/SkSLProgramElement.h"
56 #include "src/sksl/ir/SkSLReturnStatement.h"
57 #include "src/sksl/ir/SkSLStatement.h"
58 #include "src/sksl/ir/SkSLSwitchCase.h"
59 #include "src/sksl/ir/SkSLSwitchStatement.h"
60 #include "src/sksl/ir/SkSLSwizzle.h"
61 #include "src/sksl/ir/SkSLTernaryExpression.h"
62 #include "src/sksl/ir/SkSLType.h"
63 #include "src/sksl/ir/SkSLVarDeclarations.h"
64 #include "src/sksl/ir/SkSLVariable.h"
65 #include "src/sksl/ir/SkSLVariableReference.h"
66 #include "src/sksl/tracing/SkSLDebugTracePriv.h"
67 #include "src/sksl/transform/SkSLTransform.h"
68 
69 #include <algorithm>
70 #include <climits>
71 #include <cstddef>
72 #include <cstdint>
73 #include <float.h>
74 #include <iterator>
75 #include <optional>
76 #include <string>
77 #include <string_view>
78 #include <utility>
79 #include <vector>
80 
81 using namespace skia_private;
82 
83 namespace SkSL {
84 namespace RP {
85 
unsupported()86 static bool unsupported() {
87     // If MakeRasterPipelineProgram returns false, set a breakpoint here for more information.
88     return false;
89 }
90 
91 class AutoContinueMask;
92 class Generator;
93 class LValue;
94 
95 class SlotManager {
96 public:
SlotManager(std::vector<SlotDebugInfo> * i)97     SlotManager(std::vector<SlotDebugInfo>* i) : fSlotDebugInfo(i) {}
98 
99     /** Used by `createSlots` to add this variable to SlotDebugInfo inside the DebugTrace. */
100     void addSlotDebugInfoForGroup(const std::string& varName,
101                                   const Type& type,
102                                   Position pos,
103                                   int* groupIndex,
104                                   bool isFunctionReturnValue);
105     void addSlotDebugInfo(const std::string& varName,
106                           const Type& type,
107                           Position pos,
108                           bool isFunctionReturnValue);
109 
110     /** Creates slots associated with an SkSL variable or return value. */
111     SlotRange createSlots(std::string name,
112                           const Type& type,
113                           Position pos,
114                           bool isFunctionReturnValue);
115 
116     /**
117      * Associates previously-created slots with an SkSL variable; this can allow multiple variables
118      * to share overlapping ranges. If the variable was already associated with a slot range,
119      * returns the previously associated range.
120      */
121     std::optional<SlotRange> mapVariableToSlots(const Variable& v, SlotRange range);
122 
123     /**
124      * Deletes the existing mapping between a variable and its slots; a future call to
125      * `getVariableSlots` will see this as a brand new variable and associate new slots.
126      */
127     void unmapVariableSlots(const Variable& v);
128 
129     /** Looks up the slots associated with an SkSL variable; creates the slot if necessary. */
130     SlotRange getVariableSlots(const Variable& v);
131 
132     /**
133      * Looks up the slots associated with an SkSL function's return value; creates the range if
134      * necessary. Note that recursion is never supported, so we don't need to maintain return values
135      * in a stack; we can just statically allocate one slot per function call-site.
136      */
137     SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f);
138 
139     /** Returns the total number of slots consumed. */
slotCount() const140     int slotCount() const { return fSlotCount; }
141 
142 private:
143     THashMap<const IRNode*, SlotRange> fSlotMap;
144     int fSlotCount = 0;
145     std::vector<SlotDebugInfo>* fSlotDebugInfo;
146 };
147 
148 class AutoStack {
149 public:
150     /**
151      * Creates a temporary stack. The caller is responsible for discarding every entry on this
152      * stack before ~AutoStack is reached.
153      */
154     explicit AutoStack(Generator* g);
155     ~AutoStack();
156 
157     /** Activates the associated stack. */
158     void enter();
159 
160     /** Undoes a call to `enter`, returning to the previously-active stack. */
161     void exit();
162 
163     /** Returns the stack ID of this AutoStack. */
stackID()164     int stackID() { return fStackID; }
165 
166     /** Clones values from this stack onto the top of the active stack. */
167     void pushClone(int slots);
168 
169     /** Clones values from a fixed range of this stack onto the top of the active stack. */
170     void pushClone(SlotRange range, int offsetFromStackTop);
171 
172     /** Clones values from a dynamic range of this stack onto the top of the active stack. */
173     void pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop);
174 
175 private:
176     Generator* fGenerator;
177     int fStackID = 0;
178     int fParentStackID = 0;
179 };
180 
181 class Generator {
182 public:
Generator(const SkSL::Program & program,DebugTracePriv * debugTrace,bool writeTraceOps)183     Generator(const SkSL::Program& program, DebugTracePriv* debugTrace, bool writeTraceOps)
184             : fProgram(program)
185             , fContext(fProgram.fContext->fTypes, *fProgram.fContext->fErrors)
186             , fDebugTrace(debugTrace)
187             , fWriteTraceOps(writeTraceOps)
188             , fProgramSlots(debugTrace ? &debugTrace->fSlotInfo : nullptr)
189             , fUniformSlots(debugTrace ? &debugTrace->fUniformInfo : nullptr)
190             , fImmutableSlots(nullptr) {
191         fContext.fConfig = fProgram.fConfig.get();
192         fContext.fModule = fProgram.fContext->fModule;
193     }
194 
~Generator()195     ~Generator() {
196         // ~AutoStack calls into the Generator, so we need to make sure the trace mask is reset
197         // before the Generator is destroyed.
198         fTraceMask.reset();
199     }
200 
201     /** Converts the SkSL main() function into a set of Instructions. */
202     bool writeProgram(const FunctionDefinition& function);
203 
204     /** Returns the generated program. */
205     std::unique_ptr<RP::Program> finish();
206 
207     /**
208      * Converts an SkSL function into a set of Instructions. Returns nullopt if the function
209      * contained unsupported statements or expressions.
210      */
211     std::optional<SlotRange> writeFunction(const IRNode& callSite,
212                                            const FunctionDefinition& function,
213                                            SkSpan<std::unique_ptr<Expression> const> arguments);
214 
215     /**
216      * Returns the slot index of this function inside the FunctionDebugInfo array in DebugTracePriv.
217      * The FunctionDebugInfo slot will be created if it doesn't already exist.
218      */
219     int getFunctionDebugInfo(const FunctionDeclaration& decl);
220 
221     /** Returns true for variables with slots in fProgramSlots; immutables or uniforms are false. */
hasVariableSlots(const Variable & v)222     bool hasVariableSlots(const Variable& v) {
223         return !IsUniform(v) && !fImmutableVariables.contains(&v);
224     }
225 
226     /** Looks up the slots associated with an SkSL variable; creates the slots if necessary. */
getVariableSlots(const Variable & v)227     SlotRange getVariableSlots(const Variable& v) {
228         SkASSERT(this->hasVariableSlots(v));
229         return fProgramSlots.getVariableSlots(v);
230     }
231 
232     /**
233      * Looks up the slots associated with an immutable variable; creates the slots if necessary.
234      */
getImmutableSlots(const Variable & v)235     SlotRange getImmutableSlots(const Variable& v) {
236         SkASSERT(!IsUniform(v));
237         SkASSERT(fImmutableVariables.contains(&v));
238         return fImmutableSlots.getVariableSlots(v);
239     }
240 
241     /** Looks up the slots associated with an SkSL uniform; creates the slots if necessary. */
getUniformSlots(const Variable & v)242     SlotRange getUniformSlots(const Variable& v) {
243         SkASSERT(IsUniform(v));
244         SkASSERT(!fImmutableVariables.contains(&v));
245         return fUniformSlots.getVariableSlots(v);
246     }
247 
248     /**
249      * Looks up the slots associated with an SkSL function's return value; creates the range if
250      * necessary. Note that recursion is never supported, so we don't need to maintain return values
251      * in a stack; we can just statically allocate one slot per function call-site.
252      */
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)253     SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
254         return fProgramSlots.getFunctionSlots(callSite, f);
255     }
256 
257     /**
258      * Creates an additional stack for the program to push values onto. The stack will not become
259      * actively in-use until `setCurrentStack` is called.
260      */
261     int createStack();
262 
263     /** Frees a stack generated by `createStack`. The freed stack must be completely empty. */
264     void recycleStack(int stackID);
265 
266     /** Redirects builder ops to point to a different stack (created by `createStack`). */
267     void setCurrentStack(int stackID);
268 
269     /** Reports the currently active stack. */
currentStack()270     int currentStack() {
271         return fCurrentStack;
272     }
273 
274     /**
275      * Returns an LValue for the passed-in expression; if the expression isn't supported as an
276      * LValue, returns nullptr.
277      */
278     std::unique_ptr<LValue> makeLValue(const Expression& e, bool allowScratch = false);
279 
280     /** Copies the top-of-stack value into this lvalue, without discarding it from the stack. */
281     [[nodiscard]] bool store(LValue& lvalue);
282 
283     /** Pushes the lvalue onto the top-of-stack. */
284     [[nodiscard]] bool push(LValue& lvalue);
285 
286     /** The Builder stitches our instructions together into Raster Pipeline code. */
builder()287     Builder* builder() { return &fBuilder; }
288 
289     /** Appends a statement to the program. */
290     [[nodiscard]] bool writeStatement(const Statement& s);
291     [[nodiscard]] bool writeBlock(const Block& b);
292     [[nodiscard]] bool writeBreakStatement(const BreakStatement& b);
293     [[nodiscard]] bool writeContinueStatement(const ContinueStatement& b);
294     [[nodiscard]] bool writeDoStatement(const DoStatement& d);
295     [[nodiscard]] bool writeExpressionStatement(const ExpressionStatement& e);
296     [[nodiscard]] bool writeMasklessForStatement(const ForStatement& f);
297     [[nodiscard]] bool writeForStatement(const ForStatement& f);
298     [[nodiscard]] bool writeGlobals();
299     [[nodiscard]] bool writeIfStatement(const IfStatement& i);
300     [[nodiscard]] bool writeDynamicallyUniformIfStatement(const IfStatement& i);
301     [[nodiscard]] bool writeReturnStatement(const ReturnStatement& r);
302     [[nodiscard]] bool writeSwitchStatement(const SwitchStatement& s);
303     [[nodiscard]] bool writeVarDeclaration(const VarDeclaration& v);
304     [[nodiscard]] bool writeImmutableVarDeclaration(const VarDeclaration& d);
305 
306     /** Pushes an expression to the value stack. */
307     [[nodiscard]] bool pushBinaryExpression(const BinaryExpression& e);
308     [[nodiscard]] bool pushBinaryExpression(const Expression& left,
309                                             Operator op,
310                                             const Expression& right);
311     [[nodiscard]] bool pushChildCall(const ChildCall& c);
312     [[nodiscard]] bool pushConstructorCast(const AnyConstructor& c);
313     [[nodiscard]] bool pushConstructorCompound(const AnyConstructor& c);
314     [[nodiscard]] bool pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c);
315     [[nodiscard]] bool pushConstructorMatrixResize(const ConstructorMatrixResize& c);
316     [[nodiscard]] bool pushConstructorSplat(const ConstructorSplat& c);
317     [[nodiscard]] bool pushExpression(const Expression& e, bool usesResult = true);
318     [[nodiscard]] bool pushFieldAccess(const FieldAccess& f);
319     [[nodiscard]] bool pushFunctionCall(const FunctionCall& c);
320     [[nodiscard]] bool pushIndexExpression(const IndexExpression& i);
321     [[nodiscard]] bool pushIntrinsic(const FunctionCall& c);
322     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0);
323     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
324                                      const Expression& arg0,
325                                      const Expression& arg1);
326     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
327                                      const Expression& arg0,
328                                      const Expression& arg1,
329                                      const Expression& arg2);
330     [[nodiscard]] bool pushLiteral(const Literal& l);
331     [[nodiscard]] bool pushPostfixExpression(const PostfixExpression& p, bool usesResult);
332     [[nodiscard]] bool pushPrefixExpression(const PrefixExpression& p);
333     [[nodiscard]] bool pushPrefixExpression(Operator op, const Expression& expr);
334     [[nodiscard]] bool pushSwizzle(const Swizzle& s);
335     [[nodiscard]] bool pushTernaryExpression(const TernaryExpression& t);
336     [[nodiscard]] bool pushTernaryExpression(const Expression& test,
337                                              const Expression& ifTrue,
338                                              const Expression& ifFalse);
339     [[nodiscard]] bool pushDynamicallyUniformTernaryExpression(const Expression& test,
340                                                                const Expression& ifTrue,
341                                                                const Expression& ifFalse);
342     [[nodiscard]] bool pushVariableReference(const VariableReference& v);
343 
344     /** Support methods for immutable data, which trade more slots for smaller code size. */
345     using ImmutableBits = int32_t;
346 
347     [[nodiscard]] bool pushImmutableData(const Expression& e);
348     [[nodiscard]] std::optional<SlotRange> findPreexistingImmutableData(
349             const TArray<ImmutableBits>& immutableValues);
350     [[nodiscard]] std::optional<ImmutableBits> getImmutableBitsForSlot(const Expression& expr,
351                                                                        size_t slot);
352     [[nodiscard]] bool getImmutableValueForExpression(const Expression& expr,
353                                                       TArray<ImmutableBits>* immutableValues);
354     void storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues, SlotRange slots);
355 
356     /** Pops an expression from the value stack and copies it into slots. */
popToSlotRange(SlotRange r)357     void popToSlotRange(SlotRange r) {
358         fBuilder.pop_slots(r);
359         if (this->shouldWriteTraceOps()) {
360             fBuilder.trace_var(fTraceMask->stackID(), r);
361         }
362     }
popToSlotRangeUnmasked(SlotRange r)363     void popToSlotRangeUnmasked(SlotRange r) {
364         fBuilder.pop_slots_unmasked(r);
365         if (this->shouldWriteTraceOps()) {
366             fBuilder.trace_var(fTraceMask->stackID(), r);
367         }
368     }
369 
370     /** Pops an expression from the value stack and discards it. */
discardExpression(int slots)371     void discardExpression(int slots) { fBuilder.discard_stack(slots); }
372 
373     /** Zeroes out a range of slots. */
zeroSlotRangeUnmasked(SlotRange r)374     void zeroSlotRangeUnmasked(SlotRange r) {
375         fBuilder.zero_slots_unmasked(r);
376         if (this->shouldWriteTraceOps()) {
377             fBuilder.trace_var(fTraceMask->stackID(), r);
378         }
379     }
380 
381     /**
382      * Emits a trace_line opcode. writeStatement does this, and statements that alter control flow
383      * may need to explicitly add additional traces.
384      */
385     void emitTraceLine(Position pos);
386 
387     /**
388      * Emits a trace_scope opcode, which alters the SkSL variable-scope depth.
389      * Unlike the other trace ops, trace_scope takes a dedicated mask instead of the trace-scope
390      * mask. Call `pushTraceScopeMask` to synthesize this mask; discard it when you're done.
391      */
392     void pushTraceScopeMask();
393     void discardTraceScopeMask();
394     void emitTraceScope(int delta);
395 
396     /** Prepares our position-to-line-offset conversion table (stored in `fLineOffsets`). */
397     void calculateLineOffsets();
398 
shouldWriteTraceOps()399     bool shouldWriteTraceOps() { return fDebugTrace && fWriteTraceOps; }
traceMaskStackID()400     int traceMaskStackID() { return fTraceMask->stackID(); }
401 
402     /** Expression utilities. */
403     struct TypedOps {
404         BuilderOp fFloatOp;
405         BuilderOp fSignedOp;
406         BuilderOp fUnsignedOp;
407         BuilderOp fBooleanOp;
408     };
409 
410     static BuilderOp GetTypedOp(const SkSL::Type& type, const TypedOps& ops);
411 
412     [[nodiscard]] bool unaryOp(const SkSL::Type& type, const TypedOps& ops);
413     [[nodiscard]] bool binaryOp(const SkSL::Type& type, const TypedOps& ops);
414     [[nodiscard]] bool ternaryOp(const SkSL::Type& type, const TypedOps& ops);
415     [[nodiscard]] bool pushIntrinsic(const TypedOps& ops, const Expression& arg0);
416     [[nodiscard]] bool pushIntrinsic(const TypedOps& ops,
417                                      const Expression& arg0,
418                                      const Expression& arg1);
419     [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp, const Expression& arg0);
420     [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp,
421                                      const Expression& arg0,
422                                      const Expression& arg1);
423     [[nodiscard]] bool pushAbsFloatIntrinsic(int slots);
424     [[nodiscard]] bool pushLengthIntrinsic(int slotCount);
425     [[nodiscard]] bool pushVectorizedExpression(const Expression& expr, const Type& vectorType);
426     [[nodiscard]] bool pushVariableReferencePartial(const VariableReference& v, SlotRange subset);
427     [[nodiscard]] bool pushLValueOrExpression(LValue* lvalue, const Expression& expr);
428     [[nodiscard]] bool pushMatrixMultiply(LValue* lvalue,
429                                           const Expression& left,
430                                           const Expression& right,
431                                           int leftColumns, int leftRows,
432                                           int rightColumns, int rightRows);
433     [[nodiscard]] bool pushStructuredComparison(LValue* left,
434                                                 Operator op,
435                                                 LValue* right,
436                                                 const Type& type);
437 
438     void foldWithMultiOp(BuilderOp op, int elements);
439     void foldComparisonOp(Operator op, int elements);
440 
441     BuilderOp getTypedOp(const SkSL::Type& type, const TypedOps& ops) const;
442 
returnComplexity(const FunctionDefinition * func)443     Analysis::ReturnComplexity returnComplexity(const FunctionDefinition* func) {
444         Analysis::ReturnComplexity* complexity = fReturnComplexityMap.find(func);
445         if (!complexity) {
446             complexity = fReturnComplexityMap.set(fCurrentFunction,
447                                                   Analysis::GetReturnComplexity(*func));
448         }
449         return *complexity;
450     }
451 
needsReturnMask(const FunctionDefinition * func)452     bool needsReturnMask(const FunctionDefinition* func) {
453         return this->returnComplexity(func) >= Analysis::ReturnComplexity::kEarlyReturns;
454     }
455 
needsFunctionResultSlots(const FunctionDefinition * func)456     bool needsFunctionResultSlots(const FunctionDefinition* func) {
457         return this->shouldWriteTraceOps() || (this->returnComplexity(func) >
458                                                Analysis::ReturnComplexity::kSingleSafeReturn);
459     }
460 
IsUniform(const Variable & var)461     static bool IsUniform(const Variable& var) {
462        return var.modifierFlags().isUniform();
463     }
464 
IsOutParameter(const Variable & var)465     static bool IsOutParameter(const Variable& var) {
466         return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
467                ModifierFlag::kOut;
468     }
469 
IsInoutParameter(const Variable & var)470     static bool IsInoutParameter(const Variable& var) {
471         return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
472                (ModifierFlag::kIn | ModifierFlag::kOut);
473     }
474 
475 private:
476     const SkSL::Program& fProgram;
477     SkSL::Context fContext;
478     Builder fBuilder;
479     DebugTracePriv* fDebugTrace = nullptr;
480     bool fWriteTraceOps = false;
481     THashMap<const Variable*, int> fChildEffectMap;
482 
483     SlotManager fProgramSlots;
484     SlotManager fUniformSlots;
485     SlotManager fImmutableSlots;
486 
487     std::optional<AutoStack> fTraceMask;
488     const FunctionDefinition* fCurrentFunction = nullptr;
489     SlotRange fCurrentFunctionResult;
490     AutoContinueMask* fCurrentContinueMask = nullptr;
491     int fCurrentBreakTarget = -1;
492     int fCurrentStack = 0;
493     int fNextStackID = 0;
494     TArray<int> fRecycledStacks;
495 
496     THashMap<const FunctionDefinition*, Analysis::ReturnComplexity> fReturnComplexityMap;
497 
498     THashMap<ImmutableBits, THashSet<Slot>> fImmutableSlotMap;
499     THashSet<const Variable*> fImmutableVariables;
500 
501     // `fInsideCompoundStatement` will be nonzero if we are currently writing statements inside of a
502     // compound-statement Block. (Conceptually those statements should all count as one.)
503     int fInsideCompoundStatement = 0;
504 
505     // `fLineOffsets` contains the position of each newline in the source, plus a zero at the
506     // beginning, and the total source length at the end, as sentinels.
507     TArray<int> fLineOffsets;
508 
509     static constexpr auto kAddOps = TypedOps{BuilderOp::add_n_floats,
510                                              BuilderOp::add_n_ints,
511                                              BuilderOp::add_n_ints,
512                                              BuilderOp::unsupported};
513     static constexpr auto kSubtractOps = TypedOps{BuilderOp::sub_n_floats,
514                                                   BuilderOp::sub_n_ints,
515                                                   BuilderOp::sub_n_ints,
516                                                   BuilderOp::unsupported};
517     static constexpr auto kMultiplyOps = TypedOps{BuilderOp::mul_n_floats,
518                                                   BuilderOp::mul_n_ints,
519                                                   BuilderOp::mul_n_ints,
520                                                   BuilderOp::unsupported};
521     static constexpr auto kDivideOps = TypedOps{BuilderOp::div_n_floats,
522                                                 BuilderOp::div_n_ints,
523                                                 BuilderOp::div_n_uints,
524                                                 BuilderOp::unsupported};
525     static constexpr auto kLessThanOps = TypedOps{BuilderOp::cmplt_n_floats,
526                                                   BuilderOp::cmplt_n_ints,
527                                                   BuilderOp::cmplt_n_uints,
528                                                   BuilderOp::unsupported};
529     static constexpr auto kLessThanEqualOps = TypedOps{BuilderOp::cmple_n_floats,
530                                                        BuilderOp::cmple_n_ints,
531                                                        BuilderOp::cmple_n_uints,
532                                                        BuilderOp::unsupported};
533     static constexpr auto kEqualOps = TypedOps{BuilderOp::cmpeq_n_floats,
534                                                BuilderOp::cmpeq_n_ints,
535                                                BuilderOp::cmpeq_n_ints,
536                                                BuilderOp::cmpeq_n_ints};
537     static constexpr auto kNotEqualOps = TypedOps{BuilderOp::cmpne_n_floats,
538                                                   BuilderOp::cmpne_n_ints,
539                                                   BuilderOp::cmpne_n_ints,
540                                                   BuilderOp::cmpne_n_ints};
541     static constexpr auto kModOps = TypedOps{BuilderOp::mod_n_floats,
542                                              BuilderOp::unsupported,
543                                              BuilderOp::unsupported,
544                                              BuilderOp::unsupported};
545     static constexpr auto kMinOps = TypedOps{BuilderOp::min_n_floats,
546                                              BuilderOp::min_n_ints,
547                                              BuilderOp::min_n_uints,
548                                              BuilderOp::min_n_uints};
549     static constexpr auto kMaxOps = TypedOps{BuilderOp::max_n_floats,
550                                              BuilderOp::max_n_ints,
551                                              BuilderOp::max_n_uints,
552                                              BuilderOp::max_n_uints};
553     static constexpr auto kMixOps = TypedOps{BuilderOp::mix_n_floats,
554                                              BuilderOp::unsupported,
555                                              BuilderOp::unsupported,
556                                              BuilderOp::unsupported};
557     static constexpr auto kInverseSqrtOps = TypedOps{BuilderOp::invsqrt_float,
558                                                      BuilderOp::unsupported,
559                                                      BuilderOp::unsupported,
560                                                      BuilderOp::unsupported};
561     friend class AutoContinueMask;
562 };
563 
AutoStack(Generator * g)564 AutoStack::AutoStack(Generator* g)
565         : fGenerator(g)
566         , fStackID(g->createStack()) {}
567 
~AutoStack()568 AutoStack::~AutoStack() {
569     fGenerator->recycleStack(fStackID);
570 }
571 
enter()572 void AutoStack::enter() {
573     fParentStackID = fGenerator->currentStack();
574     fGenerator->setCurrentStack(fStackID);
575 }
576 
exit()577 void AutoStack::exit() {
578     SkASSERT(fGenerator->currentStack() == fStackID);
579     fGenerator->setCurrentStack(fParentStackID);
580 }
581 
pushClone(int slots)582 void AutoStack::pushClone(int slots) {
583     this->pushClone(SlotRange{0, slots}, /*offsetFromStackTop=*/slots);
584 }
585 
pushClone(SlotRange range,int offsetFromStackTop)586 void AutoStack::pushClone(SlotRange range, int offsetFromStackTop) {
587     fGenerator->builder()->push_clone_from_stack(range, fStackID, offsetFromStackTop);
588 }
589 
pushCloneIndirect(SlotRange range,int dynamicStackID,int offsetFromStackTop)590 void AutoStack::pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop) {
591     fGenerator->builder()->push_clone_indirect_from_stack(
592             range, dynamicStackID, /*otherStackID=*/fStackID, offsetFromStackTop);
593 }
594 
595 class AutoContinueMask {
596 public:
AutoContinueMask(Generator * gen)597     AutoContinueMask(Generator* gen) : fGenerator(gen) {}
598 
~AutoContinueMask()599     ~AutoContinueMask() {
600         if (fPreviousContinueMask) {
601             fGenerator->fCurrentContinueMask = fPreviousContinueMask;
602         }
603     }
604 
enable()605     void enable() {
606         SkASSERT(!fContinueMaskStack.has_value());
607 
608         fContinueMaskStack.emplace(fGenerator);
609         fPreviousContinueMask = fGenerator->fCurrentContinueMask;
610         fGenerator->fCurrentContinueMask = this;
611     }
612 
enter()613     void enter() {
614         SkASSERT(fContinueMaskStack.has_value());
615         fContinueMaskStack->enter();
616     }
617 
exit()618     void exit() {
619         SkASSERT(fContinueMaskStack.has_value());
620         fContinueMaskStack->exit();
621     }
622 
enterLoopBody()623     void enterLoopBody() {
624         if (fContinueMaskStack.has_value()) {
625             fContinueMaskStack->enter();
626             fGenerator->builder()->push_constant_i(0);
627             fContinueMaskStack->exit();
628         }
629     }
630 
exitLoopBody()631     void exitLoopBody() {
632         if (fContinueMaskStack.has_value()) {
633             fContinueMaskStack->enter();
634             fGenerator->builder()->pop_and_reenable_loop_mask();
635             fContinueMaskStack->exit();
636         }
637     }
638 
stackID()639     int stackID() {
640         SkASSERT(fContinueMaskStack.has_value());
641         return fContinueMaskStack->stackID();
642     }
643 
644 private:
645     std::optional<AutoStack> fContinueMaskStack;
646     Generator* fGenerator = nullptr;
647     AutoContinueMask* fPreviousContinueMask = nullptr;
648 };
649 
650 class AutoLoopTarget {
651 public:
AutoLoopTarget(Generator * gen,int * targetPtr)652     AutoLoopTarget(Generator* gen, int* targetPtr) : fGenerator(gen), fLoopTargetPtr(targetPtr) {
653         fLabelID = fGenerator->builder()->nextLabelID();
654         fPreviousLoopTarget = *fLoopTargetPtr;
655         *fLoopTargetPtr = fLabelID;
656     }
657 
~AutoLoopTarget()658     ~AutoLoopTarget() {
659         *fLoopTargetPtr = fPreviousLoopTarget;
660     }
661 
labelID()662     int labelID() {
663         return fLabelID;
664     }
665 
666 private:
667     Generator* fGenerator = nullptr;
668     int* fLoopTargetPtr = nullptr;
669     int fPreviousLoopTarget;
670     int fLabelID;
671 };
672 
673 class LValue {
674 public:
675     virtual ~LValue() = default;
676 
677     /** Returns true if this lvalue is actually writable--temporaries and uniforms are not. */
678     virtual bool isWritable() const = 0;
679 
680     /**
681      * Returns the fixed slot range of the lvalue, after it is winnowed down to the selected
682      * field/index. The range is calculated assuming every dynamic index will evaluate to zero.
683      */
684     virtual SlotRange fixedSlotRange(Generator* gen) = 0;
685 
686     /**
687      * Returns a stack which holds a single integer, representing the dynamic offset of the lvalue.
688      * This value does not incorporate the fixed offset. If null is returned, the lvalue doesn't
689      * have a dynamic offset. `evaluateDynamicIndices` must be called before this is used.
690      */
691     virtual AutoStack* dynamicSlotRange() = 0;
692 
693     /** Returns the swizzle components of the lvalue, or an empty span for non-swizzle LValues. */
swizzle()694     virtual SkSpan<const int8_t> swizzle() { return {}; }
695 
696     /** Pushes values directly onto the stack. */
697     [[nodiscard]] virtual bool push(Generator* gen,
698                                     SlotRange fixedOffset,
699                                     AutoStack* dynamicOffset,
700                                     SkSpan<const int8_t> swizzle) = 0;
701 
702     /** Stores topmost values from the stack directly into the lvalue. */
703     [[nodiscard]] virtual bool store(Generator* gen,
704                                      SlotRange fixedOffset,
705                                      AutoStack* dynamicOffset,
706                                      SkSpan<const int8_t> swizzle) = 0;
707     /**
708      * Some lvalues refer to a temporary expression; these temps can be held in the
709      * scratch-expression field to ensure that they exist for the lifetime of the lvalue.
710      */
711     std::unique_ptr<Expression> fScratchExpression;
712 };
713 
714 class ScratchLValue final : public LValue {
715 public:
ScratchLValue(const Expression & e)716     explicit ScratchLValue(const Expression& e)
717             : fExpression(&e)
718             , fNumSlots(e.type().slotCount()) {}
719 
~ScratchLValue()720     ~ScratchLValue() override {
721         if (fGenerator && fDedicatedStack.has_value()) {
722             // Jettison the scratch expression.
723             fDedicatedStack->enter();
724             fGenerator->discardExpression(fNumSlots);
725             fDedicatedStack->exit();
726         }
727     }
728 
isWritable() const729     bool isWritable() const override {
730         return false;
731     }
732 
fixedSlotRange(Generator * gen)733     SlotRange fixedSlotRange(Generator* gen) override {
734         return SlotRange{0, fNumSlots};
735     }
736 
dynamicSlotRange()737     AutoStack* dynamicSlotRange() override {
738         return nullptr;
739     }
740 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)741     [[nodiscard]] bool push(Generator* gen,
742                             SlotRange fixedOffset,
743                             AutoStack* dynamicOffset,
744                             SkSpan<const int8_t> swizzle) override {
745         if (!fDedicatedStack.has_value()) {
746             // Push the scratch expression onto a dedicated stack.
747             fGenerator = gen;
748             fDedicatedStack.emplace(fGenerator);
749             fDedicatedStack->enter();
750             if (!fGenerator->pushExpression(*fExpression)) {
751                 return unsupported();
752             }
753             fDedicatedStack->exit();
754         }
755 
756         if (dynamicOffset) {
757             fDedicatedStack->pushCloneIndirect(fixedOffset, dynamicOffset->stackID(), fNumSlots);
758         } else {
759             fDedicatedStack->pushClone(fixedOffset, fNumSlots);
760         }
761         if (!swizzle.empty()) {
762             gen->builder()->swizzle(fixedOffset.count, swizzle);
763         }
764         return true;
765     }
766 
store(Generator *,SlotRange,AutoStack *,SkSpan<const int8_t>)767     [[nodiscard]] bool store(Generator*, SlotRange, AutoStack*, SkSpan<const int8_t>) override {
768         SkDEBUGFAIL("scratch lvalues cannot be stored into");
769         return unsupported();
770     }
771 
772 private:
773     Generator* fGenerator = nullptr;
774     const Expression* fExpression = nullptr;
775     std::optional<AutoStack> fDedicatedStack;
776     int fNumSlots = 0;
777 };
778 
779 class VariableLValue final : public LValue {
780 public:
VariableLValue(const Variable * v)781     explicit VariableLValue(const Variable* v) : fVariable(v) {}
782 
isWritable() const783     bool isWritable() const override {
784         return !Generator::IsUniform(*fVariable);
785     }
786 
fixedSlotRange(Generator * gen)787     SlotRange fixedSlotRange(Generator* gen) override {
788         return Generator::IsUniform(*fVariable) ? gen->getUniformSlots(*fVariable)
789                                                 : gen->getVariableSlots(*fVariable);
790     }
791 
dynamicSlotRange()792     AutoStack* dynamicSlotRange() override {
793         return nullptr;
794     }
795 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)796     [[nodiscard]] bool push(Generator* gen,
797                             SlotRange fixedOffset,
798                             AutoStack* dynamicOffset,
799                             SkSpan<const int8_t> swizzle) override {
800         if (Generator::IsUniform(*fVariable)) {
801             if (dynamicOffset) {
802                 gen->builder()->push_uniform_indirect(fixedOffset, dynamicOffset->stackID(),
803                                                       this->fixedSlotRange(gen));
804             } else {
805                 gen->builder()->push_uniform(fixedOffset);
806             }
807         } else {
808             if (dynamicOffset) {
809                 gen->builder()->push_slots_indirect(fixedOffset, dynamicOffset->stackID(),
810                                                     this->fixedSlotRange(gen));
811             } else {
812                 gen->builder()->push_slots(fixedOffset);
813             }
814         }
815         if (!swizzle.empty()) {
816             gen->builder()->swizzle(fixedOffset.count, swizzle);
817         }
818         return true;
819     }
820 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)821     [[nodiscard]] bool store(Generator* gen,
822                              SlotRange fixedOffset,
823                              AutoStack* dynamicOffset,
824                              SkSpan<const int8_t> swizzle) override {
825         SkASSERT(!Generator::IsUniform(*fVariable));
826 
827         if (swizzle.empty()) {
828             if (dynamicOffset) {
829                 gen->builder()->copy_stack_to_slots_indirect(fixedOffset, dynamicOffset->stackID(),
830                                                              this->fixedSlotRange(gen));
831             } else {
832                 gen->builder()->copy_stack_to_slots(fixedOffset);
833             }
834         } else {
835             if (dynamicOffset) {
836                 gen->builder()->swizzle_copy_stack_to_slots_indirect(fixedOffset,
837                                                                      dynamicOffset->stackID(),
838                                                                      this->fixedSlotRange(gen),
839                                                                      swizzle,
840                                                                      swizzle.size());
841             } else {
842                 gen->builder()->swizzle_copy_stack_to_slots(fixedOffset, swizzle, swizzle.size());
843             }
844         }
845         if (gen->shouldWriteTraceOps()) {
846             if (dynamicOffset) {
847                 gen->builder()->trace_var_indirect(gen->traceMaskStackID(),
848                                                    fixedOffset,
849                                                    dynamicOffset->stackID(),
850                                                    this->fixedSlotRange(gen));
851             } else {
852                 gen->builder()->trace_var(gen->traceMaskStackID(), fixedOffset);
853             }
854         }
855         return true;
856     }
857 
858 private:
859     const Variable* fVariable;
860 };
861 
862 class ImmutableLValue final : public LValue {
863 public:
ImmutableLValue(const Variable * v)864     explicit ImmutableLValue(const Variable* v) : fVariable(v) {}
865 
isWritable() const866     bool isWritable() const override {
867         return false;
868     }
869 
fixedSlotRange(Generator * gen)870     SlotRange fixedSlotRange(Generator* gen) override {
871         return gen->getImmutableSlots(*fVariable);
872     }
873 
dynamicSlotRange()874     AutoStack* dynamicSlotRange() override {
875         return nullptr;
876     }
877 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)878     [[nodiscard]] bool push(Generator* gen,
879                             SlotRange fixedOffset,
880                             AutoStack* dynamicOffset,
881                             SkSpan<const int8_t> swizzle) override {
882         if (dynamicOffset) {
883             gen->builder()->push_immutable_indirect(fixedOffset, dynamicOffset->stackID(),
884                                                     this->fixedSlotRange(gen));
885         } else {
886             gen->builder()->push_immutable(fixedOffset);
887         }
888         if (!swizzle.empty()) {
889             gen->builder()->swizzle(fixedOffset.count, swizzle);
890         }
891         return true;
892     }
893 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)894     [[nodiscard]] bool store(Generator* gen,
895                              SlotRange fixedOffset,
896                              AutoStack* dynamicOffset,
897                              SkSpan<const int8_t> swizzle) override {
898         SkDEBUGFAIL("immutable values cannot be stored into");
899         return unsupported();
900     }
901 
902 private:
903     const Variable* fVariable;
904 };
905 
906 class SwizzleLValue final : public LValue {
907 public:
SwizzleLValue(std::unique_ptr<LValue> p,const ComponentArray & c)908     explicit SwizzleLValue(std::unique_ptr<LValue> p, const ComponentArray& c)
909             : fParent(std::move(p))
910             , fComponents(c) {
911         SkASSERT(!fComponents.empty() && fComponents.size() <= 4);
912     }
913 
isWritable() const914     bool isWritable() const override {
915         return fParent->isWritable();
916     }
917 
fixedSlotRange(Generator * gen)918     SlotRange fixedSlotRange(Generator* gen) override {
919         return fParent->fixedSlotRange(gen);
920     }
921 
dynamicSlotRange()922     AutoStack* dynamicSlotRange() override {
923         return fParent->dynamicSlotRange();
924     }
925 
swizzle()926     SkSpan<const int8_t> swizzle() override {
927         return fComponents;
928     }
929 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)930     [[nodiscard]] bool push(Generator* gen,
931                             SlotRange fixedOffset,
932                             AutoStack* dynamicOffset,
933                             SkSpan<const int8_t> swizzle) override {
934         if (!swizzle.empty()) {
935             SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
936             return unsupported();
937         }
938         return fParent->push(gen, fixedOffset, dynamicOffset, fComponents);
939     }
940 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)941     [[nodiscard]] bool store(Generator* gen,
942                              SlotRange fixedOffset,
943                              AutoStack* dynamicOffset,
944                              SkSpan<const int8_t> swizzle) override {
945         if (!swizzle.empty()) {
946             SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
947             return unsupported();
948         }
949         return fParent->store(gen, fixedOffset, dynamicOffset, fComponents);
950     }
951 
952 private:
953     std::unique_ptr<LValue> fParent;
954     const ComponentArray& fComponents;
955 };
956 
957 class UnownedLValueSlice : public LValue {
958 public:
UnownedLValueSlice(LValue * p,int initialSlot,int numSlots)959     explicit UnownedLValueSlice(LValue* p, int initialSlot, int numSlots)
960             : fParent(p)
961             , fInitialSlot(initialSlot)
962             , fNumSlots(numSlots) {
963         SkASSERT(fInitialSlot >= 0);
964         SkASSERT(fNumSlots > 0);
965     }
966 
isWritable() const967     bool isWritable() const override {
968         return fParent->isWritable();
969     }
970 
fixedSlotRange(Generator * gen)971     SlotRange fixedSlotRange(Generator* gen) override {
972         SlotRange range = fParent->fixedSlotRange(gen);
973         SlotRange adjusted = range;
974         adjusted.index += fInitialSlot;
975         adjusted.count = fNumSlots;
976         SkASSERT((adjusted.index + adjusted.count) <= (range.index + range.count));
977         return adjusted;
978     }
979 
dynamicSlotRange()980     AutoStack* dynamicSlotRange() override {
981         return fParent->dynamicSlotRange();
982     }
983 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)984     [[nodiscard]] bool push(Generator* gen,
985                             SlotRange fixedOffset,
986                             AutoStack* dynamicOffset,
987                             SkSpan<const int8_t> swizzle) override {
988         return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
989     }
990 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)991     [[nodiscard]] bool store(Generator* gen,
992                              SlotRange fixedOffset,
993                              AutoStack* dynamicOffset,
994                              SkSpan<const int8_t> swizzle) override {
995         return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
996     }
997 
998 protected:
999     LValue* fParent;
1000 
1001 private:
1002     int fInitialSlot = 0;
1003     int fNumSlots = 0;
1004 };
1005 
1006 class LValueSlice final : public UnownedLValueSlice {
1007 public:
LValueSlice(std::unique_ptr<LValue> p,int initialSlot,int numSlots)1008     explicit LValueSlice(std::unique_ptr<LValue> p, int initialSlot, int numSlots)
1009             : UnownedLValueSlice(p.release(), initialSlot, numSlots) {}
1010 
~LValueSlice()1011     ~LValueSlice() override {
1012         delete fParent;
1013     }
1014 };
1015 
1016 class DynamicIndexLValue final : public LValue {
1017 public:
DynamicIndexLValue(std::unique_ptr<LValue> p,const IndexExpression & i)1018     explicit DynamicIndexLValue(std::unique_ptr<LValue> p, const IndexExpression& i)
1019             : fParent(std::move(p))
1020             , fIndexExpr(&i) {
1021         SkASSERT(fIndexExpr->index()->type().isInteger());
1022     }
1023 
~DynamicIndexLValue()1024     ~DynamicIndexLValue() override {
1025         if (fDedicatedStack.has_value()) {
1026             SkASSERT(fGenerator);
1027 
1028             // Jettison the index expression.
1029             fDedicatedStack->enter();
1030             fGenerator->discardExpression(/*slots=*/1);
1031             fDedicatedStack->exit();
1032         }
1033     }
1034 
isWritable() const1035     bool isWritable() const override {
1036         return fParent->isWritable();
1037     }
1038 
evaluateDynamicIndices(Generator * gen)1039     [[nodiscard]] bool evaluateDynamicIndices(Generator* gen) {
1040         // The index must only be computed once; the index-expression could have side effects.
1041         // Once it has been computed, the offset lives on `fDedicatedStack`.
1042         SkASSERT(!fDedicatedStack.has_value());
1043         SkASSERT(!fGenerator);
1044         fGenerator = gen;
1045         fDedicatedStack.emplace(fGenerator);
1046 
1047         if (!fParent->swizzle().empty()) {
1048             SkDEBUGFAIL("an indexed-swizzle should have been handled by RewriteIndexedSwizzle");
1049             return unsupported();
1050         }
1051 
1052         // Push the index expression onto the dedicated stack.
1053         fDedicatedStack->enter();
1054         if (!fGenerator->pushExpression(*fIndexExpr->index())) {
1055             return unsupported();
1056         }
1057 
1058         // Multiply the index-expression result by the per-value slot count.
1059         int slotCount = fIndexExpr->type().slotCount();
1060         if (slotCount != 1) {
1061             fGenerator->builder()->push_constant_i(fIndexExpr->type().slotCount());
1062             fGenerator->builder()->binary_op(BuilderOp::mul_n_ints, 1);
1063         }
1064 
1065         // Check to see if a parent LValue already has a dynamic index. If so, we need to
1066         // incorporate its value into our own.
1067         if (AutoStack* parentDynamicIndexStack = fParent->dynamicSlotRange()) {
1068             parentDynamicIndexStack->pushClone(/*slots=*/1);
1069             fGenerator->builder()->binary_op(BuilderOp::add_n_ints, 1);
1070         }
1071         fDedicatedStack->exit();
1072         return true;
1073     }
1074 
fixedSlotRange(Generator * gen)1075     SlotRange fixedSlotRange(Generator* gen) override {
1076         // Compute the fixed slot range as if we are indexing into position zero.
1077         SlotRange range = fParent->fixedSlotRange(gen);
1078         range.count = fIndexExpr->type().slotCount();
1079         return range;
1080     }
1081 
dynamicSlotRange()1082     AutoStack* dynamicSlotRange() override {
1083         // We incorporated any parent dynamic offsets when `evaluateDynamicIndices` was called.
1084         SkASSERT(fDedicatedStack.has_value());
1085         return &*fDedicatedStack;
1086     }
1087 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1088     [[nodiscard]] bool push(Generator* gen,
1089                             SlotRange fixedOffset,
1090                             AutoStack* dynamicOffset,
1091                             SkSpan<const int8_t> swizzle) override {
1092         return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
1093     }
1094 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1095     [[nodiscard]] bool store(Generator* gen,
1096                              SlotRange fixedOffset,
1097                              AutoStack* dynamicOffset,
1098                              SkSpan<const int8_t> swizzle) override {
1099         return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
1100     }
1101 
1102 private:
1103     Generator* fGenerator = nullptr;
1104     std::unique_ptr<LValue> fParent;
1105     std::optional<AutoStack> fDedicatedStack;
1106     const IndexExpression* fIndexExpr = nullptr;
1107 };
1108 
addSlotDebugInfoForGroup(const std::string & varName,const Type & type,Position pos,int * groupIndex,bool isFunctionReturnValue)1109 void SlotManager::addSlotDebugInfoForGroup(const std::string& varName,
1110                                            const Type& type,
1111                                            Position pos,
1112                                            int* groupIndex,
1113                                            bool isFunctionReturnValue) {
1114     SkASSERT(fSlotDebugInfo);
1115     switch (type.typeKind()) {
1116         case Type::TypeKind::kArray: {
1117             int nslots = type.columns();
1118             const Type& elemType = type.componentType();
1119             for (int slot = 0; slot < nslots; ++slot) {
1120                 this->addSlotDebugInfoForGroup(varName + "[" + std::to_string(slot) + "]", elemType,
1121                                                pos, groupIndex, isFunctionReturnValue);
1122             }
1123             break;
1124         }
1125         case Type::TypeKind::kStruct: {
1126             for (const Field& field : type.fields()) {
1127                 this->addSlotDebugInfoForGroup(varName + "." + std::string(field.fName),
1128                                                *field.fType, pos, groupIndex,
1129                                                isFunctionReturnValue);
1130             }
1131             break;
1132         }
1133         default:
1134             SkASSERTF(0, "unsupported slot type %d", (int)type.typeKind());
1135             [[fallthrough]];
1136 
1137         case Type::TypeKind::kScalar:
1138         case Type::TypeKind::kVector:
1139         case Type::TypeKind::kMatrix: {
1140             Type::NumberKind numberKind = type.componentType().numberKind();
1141             int nslots = type.slotCount();
1142 
1143             for (int slot = 0; slot < nslots; ++slot) {
1144                 SlotDebugInfo slotInfo;
1145                 slotInfo.name = varName;
1146                 slotInfo.columns = type.columns();
1147                 slotInfo.rows = type.rows();
1148                 slotInfo.componentIndex = slot;
1149                 slotInfo.groupIndex = (*groupIndex)++;
1150                 slotInfo.numberKind = numberKind;
1151                 slotInfo.pos = pos;
1152                 slotInfo.fnReturnValue = isFunctionReturnValue ? 1 : -1;
1153                 fSlotDebugInfo->push_back(std::move(slotInfo));
1154             }
1155             break;
1156         }
1157     }
1158 }
1159 
addSlotDebugInfo(const std::string & varName,const Type & type,Position pos,bool isFunctionReturnValue)1160 void SlotManager::addSlotDebugInfo(const std::string& varName,
1161                                    const Type& type,
1162                                    Position pos,
1163                                    bool isFunctionReturnValue) {
1164     int groupIndex = 0;
1165     this->addSlotDebugInfoForGroup(varName, type, pos, &groupIndex, isFunctionReturnValue);
1166     SkASSERT((size_t)groupIndex == type.slotCount());
1167 }
1168 
createSlots(std::string name,const Type & type,Position pos,bool isFunctionReturnValue)1169 SlotRange SlotManager::createSlots(std::string name,
1170                                    const Type& type,
1171                                    Position pos,
1172                                    bool isFunctionReturnValue) {
1173     size_t nslots = type.slotCount();
1174     if (nslots == 0) {
1175         return {};
1176     }
1177     if (fSlotDebugInfo) {
1178         // Our debug slot-info table should have the same length as the actual slot table.
1179         SkASSERT(fSlotDebugInfo->size() == (size_t)fSlotCount);
1180 
1181         // Append slot names and types to our debug slot-info table.
1182         fSlotDebugInfo->reserve(fSlotCount + nslots);
1183         this->addSlotDebugInfo(name, type, pos, isFunctionReturnValue);
1184 
1185         // Confirm that we added the expected number of slots.
1186         SkASSERT(fSlotDebugInfo->size() == (size_t)(fSlotCount + nslots));
1187     }
1188 
1189     SlotRange result = {fSlotCount, (int)nslots};
1190     fSlotCount += nslots;
1191     return result;
1192 }
1193 
mapVariableToSlots(const Variable & v,SlotRange range)1194 std::optional<SlotRange> SlotManager::mapVariableToSlots(const Variable& v, SlotRange range) {
1195     SkASSERT(v.type().slotCount() == SkToSizeT(range.count));
1196     const SlotRange* existingEntry = fSlotMap.find(&v);
1197     std::optional<SlotRange> originalRange = existingEntry ? std::optional(*existingEntry)
1198                                                            : std::nullopt;
1199     fSlotMap.set(&v, range);
1200     return originalRange;
1201 }
1202 
unmapVariableSlots(const Variable & v)1203 void SlotManager::unmapVariableSlots(const Variable& v) {
1204     fSlotMap.remove(&v);
1205 }
1206 
getVariableSlots(const Variable & v)1207 SlotRange SlotManager::getVariableSlots(const Variable& v) {
1208     SlotRange* entry = fSlotMap.find(&v);
1209     if (entry != nullptr) {
1210         return *entry;
1211     }
1212     SlotRange range = this->createSlots(std::string(v.name()),
1213                                         v.type(),
1214                                         v.fPosition,
1215                                         /*isFunctionReturnValue=*/false);
1216     this->mapVariableToSlots(v, range);
1217     return range;
1218 }
1219 
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)1220 SlotRange SlotManager::getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
1221     SlotRange* entry = fSlotMap.find(&callSite);
1222     if (entry != nullptr) {
1223         return *entry;
1224     }
1225     SlotRange range = this->createSlots("[" + std::string(f.name()) + "].result",
1226                                         f.returnType(),
1227                                         f.fPosition,
1228                                         /*isFunctionReturnValue=*/true);
1229     fSlotMap.set(&callSite, range);
1230     return range;
1231 }
1232 
is_sliceable_swizzle(SkSpan<const int8_t> components)1233 static bool is_sliceable_swizzle(SkSpan<const int8_t> components) {
1234     // Determine if the swizzle rearranges its elements, or if it's a simple subset of its elements.
1235     // (A simple subset would be a sequential non-repeating range of components, like `.xyz` or
1236     // `.yzw` or `.z`, but not `.xx` or `.xz`, which can be accessed as a slice of the variable.)
1237     for (size_t index = 1; index < components.size(); ++index) {
1238         if (components[index] != int8_t(components[0] + index)) {
1239             return false;
1240         }
1241     }
1242     return true;
1243 }
1244 
makeLValue(const Expression & e,bool allowScratch)1245 std::unique_ptr<LValue> Generator::makeLValue(const Expression& e, bool allowScratch) {
1246     if (e.is<VariableReference>()) {
1247         const Variable* variable = e.as<VariableReference>().variable();
1248         if (fImmutableVariables.contains(variable)) {
1249             return std::make_unique<ImmutableLValue>(variable);
1250         }
1251         return std::make_unique<VariableLValue>(variable);
1252     }
1253     if (e.is<Swizzle>()) {
1254         const Swizzle& swizzleExpr = e.as<Swizzle>();
1255         if (std::unique_ptr<LValue> base = this->makeLValue(*swizzleExpr.base(),
1256                                                             allowScratch)) {
1257             const ComponentArray& components = swizzleExpr.components();
1258             if (is_sliceable_swizzle(components)) {
1259                 // If the swizzle is a contiguous subset, we can represent it with a fixed slice.
1260                 return std::make_unique<LValueSlice>(std::move(base), components[0],
1261                                                      components.size());
1262             }
1263             return std::make_unique<SwizzleLValue>(std::move(base), components);
1264         }
1265         return nullptr;
1266     }
1267     if (e.is<FieldAccess>()) {
1268         const FieldAccess& fieldExpr = e.as<FieldAccess>();
1269         if (std::unique_ptr<LValue> base = this->makeLValue(*fieldExpr.base(),
1270                                                             allowScratch)) {
1271             // Represent field access with a slice.
1272             return std::make_unique<LValueSlice>(std::move(base), fieldExpr.initialSlot(),
1273                                                  fieldExpr.type().slotCount());
1274         }
1275         return nullptr;
1276     }
1277     if (e.is<IndexExpression>()) {
1278         const IndexExpression& indexExpr = e.as<IndexExpression>();
1279 
1280         // If the index base is swizzled (`vec.zyx[idx]`), rewrite it into an equivalent
1281         // non-swizzled form (`vec[uint3(2,1,0)[idx]]`).
1282         if (std::unique_ptr<Expression> rewritten = Transform::RewriteIndexedSwizzle(fContext,
1283                                                                                      indexExpr)) {
1284             // Convert the rewritten expression into an lvalue.
1285             std::unique_ptr<LValue> lvalue = this->makeLValue(*rewritten, allowScratch);
1286             if (!lvalue) {
1287                 return nullptr;
1288             }
1289             // We need to hold onto the rewritten expression for the lifetime of the lvalue.
1290             lvalue->fScratchExpression = std::move(rewritten);
1291             return lvalue;
1292         }
1293         if (std::unique_ptr<LValue> base = this->makeLValue(*indexExpr.base(),
1294                                                             allowScratch)) {
1295             // If the index is a compile-time constant, we can represent it with a fixed slice.
1296             SKSL_INT indexValue;
1297             if (ConstantFolder::GetConstantInt(*indexExpr.index(), &indexValue)) {
1298                 int numSlots = indexExpr.type().slotCount();
1299                 return std::make_unique<LValueSlice>(std::move(base), numSlots * indexValue,
1300                                                      numSlots);
1301             }
1302 
1303             // Represent non-constant indexing via a dynamic index.
1304             auto dynLValue = std::make_unique<DynamicIndexLValue>(std::move(base), indexExpr);
1305             return dynLValue->evaluateDynamicIndices(this) ? std::move(dynLValue)
1306                                                            : nullptr;
1307         }
1308         return nullptr;
1309     }
1310     if (allowScratch) {
1311         // This path allows us to perform field- and index-accesses on an expression as if it were
1312         // an lvalue, but is a temporary and shouldn't be written back to.
1313         return std::make_unique<ScratchLValue>(e);
1314     }
1315     return nullptr;
1316 }
1317 
push(LValue & lvalue)1318 bool Generator::push(LValue& lvalue) {
1319     return lvalue.push(this,
1320                        lvalue.fixedSlotRange(this),
1321                        lvalue.dynamicSlotRange(),
1322                        /*swizzle=*/{});
1323 }
1324 
store(LValue & lvalue)1325 bool Generator::store(LValue& lvalue) {
1326     SkASSERT(lvalue.isWritable());
1327     return lvalue.store(this,
1328                         lvalue.fixedSlotRange(this),
1329                         lvalue.dynamicSlotRange(),
1330                         /*swizzle=*/{});
1331 }
1332 
getFunctionDebugInfo(const FunctionDeclaration & decl)1333 int Generator::getFunctionDebugInfo(const FunctionDeclaration& decl) {
1334     SkASSERT(fDebugTrace);
1335 
1336     std::string name = decl.description();
1337 
1338     // When generating the debug trace, we typically mark every function as `noinline`. This makes
1339     // the trace more confusing, since this isn't in the source program, so remove it.
1340     static constexpr std::string_view kNoInline = "noinline ";
1341     if (skstd::starts_with(name, kNoInline)) {
1342         name = name.substr(kNoInline.size());
1343     }
1344 
1345     // Look for a matching FunctionDebugInfo slot.
1346     for (size_t index = 0; index < fDebugTrace->fFuncInfo.size(); ++index) {
1347         if (fDebugTrace->fFuncInfo[index].name == name) {
1348             return index;
1349         }
1350     }
1351 
1352     // We've never called this function before; create a new slot to hold its information.
1353     int slot = (int)fDebugTrace->fFuncInfo.size();
1354     fDebugTrace->fFuncInfo.push_back(FunctionDebugInfo{std::move(name)});
1355     return slot;
1356 }
1357 
createStack()1358 int Generator::createStack() {
1359     if (!fRecycledStacks.empty()) {
1360         int stackID = fRecycledStacks.back();
1361         fRecycledStacks.pop_back();
1362         return stackID;
1363     }
1364     return ++fNextStackID;
1365 }
1366 
recycleStack(int stackID)1367 void Generator::recycleStack(int stackID) {
1368     fRecycledStacks.push_back(stackID);
1369 }
1370 
setCurrentStack(int stackID)1371 void Generator::setCurrentStack(int stackID) {
1372     if (fCurrentStack != stackID) {
1373         fCurrentStack = stackID;
1374         fBuilder.set_current_stack(stackID);
1375     }
1376 }
1377 
writeFunction(const IRNode & callSite,const FunctionDefinition & function,SkSpan<std::unique_ptr<Expression> const> arguments)1378 std::optional<SlotRange> Generator::writeFunction(
1379         const IRNode& callSite,
1380         const FunctionDefinition& function,
1381         SkSpan<std::unique_ptr<Expression> const> arguments) {
1382     // Generate debug information and emit a trace-enter op.
1383     int funcIndex = -1;
1384     if (fDebugTrace) {
1385         funcIndex = this->getFunctionDebugInfo(function.declaration());
1386         SkASSERT(funcIndex >= 0);
1387         if (this->shouldWriteTraceOps()) {
1388             fBuilder.trace_enter(fTraceMask->stackID(), funcIndex);
1389         }
1390     }
1391 
1392     // Handle parameter lvalues.
1393     struct RemappedSlotRange {
1394         const Variable* fVariable;
1395         std::optional<SlotRange> fSlotRange;
1396     };
1397     SkSpan<Variable* const> parameters = function.declaration().parameters();
1398     TArray<std::unique_ptr<LValue>> lvalues;
1399     TArray<RemappedSlotRange> remappedSlotRanges;
1400 
1401     if (function.declaration().isMain()) {
1402         // For main(), the parameter slots have already been populated by `writeProgram`, but we
1403         // still need to explicitly emit trace ops for the variables in main(), since they are
1404         // initialized before it is safe to use trace-var. (We can't invoke init-lane-masks until
1405         // after we've copied the inputs from main into slots, because dst.rgba is used to pass in a
1406         // blend-destination color, but we clobber it and put in the execution mask instead.)
1407         if (this->shouldWriteTraceOps()) {
1408             for (const Variable* var : parameters) {
1409                 fBuilder.trace_var(fTraceMask->stackID(), this->getVariableSlots(*var));
1410             }
1411         }
1412     } else {
1413         // Write all the arguments into their parameter's variable slots. Because we never allow
1414         // recursion, we don't need to worry about overwriting any existing values in those slots.
1415         // (In fact, we don't even need to apply the write mask.)
1416         lvalues.resize(arguments.size());
1417 
1418         for (size_t index = 0; index < arguments.size(); ++index) {
1419             const Expression& arg = *arguments[index];
1420             const Variable& param = *parameters[index];
1421 
1422             // Use LValues for out-parameters and inout-parameters, so we can store back to them
1423             // later.
1424             if (IsInoutParameter(param) || IsOutParameter(param)) {
1425                 lvalues[index] = this->makeLValue(arg);
1426                 if (!lvalues[index]) {
1427                     return std::nullopt;
1428                 }
1429                 // There are no guarantees on the starting value of an out-parameter, so we only
1430                 // need to store the lvalues associated with an inout parameter.
1431                 if (IsInoutParameter(param)) {
1432                     if (!this->push(*lvalues[index])) {
1433                         return std::nullopt;
1434                     }
1435                     this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1436                 }
1437                 continue;
1438             }
1439 
1440             // If a parameter is never read by the function, we don't need to populate its slots.
1441             ProgramUsage::VariableCounts paramCounts = fProgram.fUsage->get(param);
1442             if (paramCounts.fRead == 0) {
1443                 // Honor the expression's side effects, if any.
1444                 if (Analysis::HasSideEffects(arg)) {
1445                     if (!this->pushExpression(arg, /*usesResult=*/false)) {
1446                         return std::nullopt;
1447                     }
1448                     this->discardExpression(arg.type().slotCount());
1449                 }
1450                 continue;
1451             }
1452 
1453             // If the expression is a plain variable and the parameter is never written to, we don't
1454             // need to copy it; we can just share the slots from the existing variable.
1455             if (paramCounts.fWrite == 0 && arg.is<VariableReference>()) {
1456                 const Variable& var = *arg.as<VariableReference>().variable();
1457                 if (this->hasVariableSlots(var)) {
1458                     std::optional<SlotRange> originalRange =
1459                             fProgramSlots.mapVariableToSlots(param, this->getVariableSlots(var));
1460                     remappedSlotRanges.push_back({&param, originalRange});
1461                     continue;
1462                 }
1463             }
1464 
1465             // Copy input arguments into their respective parameter slots.
1466             if (!this->pushExpression(arg)) {
1467                 return std::nullopt;
1468             }
1469             this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1470         }
1471     }
1472 
1473     // Set up a slot range dedicated to this function's return value.
1474     SlotRange lastFunctionResult = fCurrentFunctionResult;
1475     fCurrentFunctionResult = this->getFunctionSlots(callSite, function.declaration());
1476 
1477     // Save off the return mask.
1478     if (this->needsReturnMask(&function)) {
1479         fBuilder.enableExecutionMaskWrites();
1480         if (!function.declaration().isMain()) {
1481             fBuilder.push_return_mask();
1482         }
1483     }
1484 
1485     // Emit the function body.
1486     if (!this->writeStatement(*function.body())) {
1487         return std::nullopt;
1488     }
1489 
1490     // Restore the original return mask.
1491     if (this->needsReturnMask(&function)) {
1492         if (!function.declaration().isMain()) {
1493             fBuilder.pop_return_mask();
1494         }
1495         fBuilder.disableExecutionMaskWrites();
1496     }
1497 
1498     // Restore the function-result slot range.
1499     SlotRange functionResult = fCurrentFunctionResult;
1500     fCurrentFunctionResult = lastFunctionResult;
1501 
1502     // Emit a trace-exit op.
1503     if (fDebugTrace && fWriteTraceOps) {
1504         fBuilder.trace_exit(fTraceMask->stackID(), funcIndex);
1505     }
1506 
1507     // Copy out-parameters and inout-parameters back to their homes.
1508     for (int index = 0; index < lvalues.size(); ++index) {
1509         if (lvalues[index]) {
1510             // Only out- and inout-parameters should have an associated lvalue.
1511             const Variable& param = *parameters[index];
1512             SkASSERT(IsInoutParameter(param) || IsOutParameter(param));
1513 
1514             // Copy the parameter's slots directly into the lvalue.
1515             fBuilder.push_slots(this->getVariableSlots(param));
1516             if (!this->store(*lvalues[index])) {
1517                 return std::nullopt;
1518             }
1519             this->discardExpression(param.type().slotCount());
1520         }
1521     }
1522 
1523     // Restore any remapped parameter slot ranges to their original values.
1524     for (const RemappedSlotRange& remapped : remappedSlotRanges) {
1525         if (remapped.fSlotRange.has_value()) {
1526             fProgramSlots.mapVariableToSlots(*remapped.fVariable, *remapped.fSlotRange);
1527         } else {
1528             fProgramSlots.unmapVariableSlots(*remapped.fVariable);
1529         }
1530     }
1531 
1532     return functionResult;
1533 }
1534 
emitTraceLine(Position pos)1535 void Generator::emitTraceLine(Position pos) {
1536     if (fDebugTrace && fWriteTraceOps && pos.valid() && fInsideCompoundStatement == 0) {
1537         // Binary search within fLineOffets to convert the position into a line number.
1538         SkASSERT(fLineOffsets.size() >= 2);
1539         SkASSERT(fLineOffsets[0] == 0);
1540         SkASSERT(fLineOffsets.back() == (int)fProgram.fSource->length());
1541         int lineNumber = std::distance(
1542                 fLineOffsets.begin(),
1543                 std::upper_bound(fLineOffsets.begin(), fLineOffsets.end(), pos.startOffset()));
1544 
1545         fBuilder.trace_line(fTraceMask->stackID(), lineNumber);
1546     }
1547 }
1548 
pushTraceScopeMask()1549 void Generator::pushTraceScopeMask() {
1550     if (this->shouldWriteTraceOps()) {
1551         // Take the intersection of the trace mask and the execution mask. To do this, start with an
1552         // all-zero mask, then use select to overwrite those zeros with the trace mask across all
1553         // executing lanes. We'll get the trace mask in executing lanes, and zero in dead lanes.
1554         fBuilder.push_constant_i(0);
1555         fTraceMask->pushClone(/*slots=*/1);
1556         fBuilder.select(/*slots=*/1);
1557     }
1558 }
1559 
discardTraceScopeMask()1560 void Generator::discardTraceScopeMask() {
1561     if (this->shouldWriteTraceOps()) {
1562         this->discardExpression(/*slots=*/1);
1563     }
1564 }
1565 
emitTraceScope(int delta)1566 void Generator::emitTraceScope(int delta) {
1567     if (this->shouldWriteTraceOps()) {
1568         fBuilder.trace_scope(this->currentStack(), delta);
1569     }
1570 }
1571 
calculateLineOffsets()1572 void Generator::calculateLineOffsets() {
1573     SkASSERT(fLineOffsets.empty());
1574     fLineOffsets.push_back(0);
1575     for (size_t i = 0; i < fProgram.fSource->length(); ++i) {
1576         if ((*fProgram.fSource)[i] == '\n') {
1577             fLineOffsets.push_back(i);
1578         }
1579     }
1580     fLineOffsets.push_back(fProgram.fSource->length());
1581 }
1582 
writeGlobals()1583 bool Generator::writeGlobals() {
1584     for (const ProgramElement* e : fProgram.elements()) {
1585         if (e->is<GlobalVarDeclaration>()) {
1586             const GlobalVarDeclaration& gvd = e->as<GlobalVarDeclaration>();
1587             const VarDeclaration& decl = gvd.varDeclaration();
1588             const Variable* var = decl.var();
1589 
1590             if (var->type().isEffectChild()) {
1591                 // Associate each child effect variable with its numeric index.
1592                 SkASSERT(!fChildEffectMap.find(var));
1593                 int childEffectIndex = fChildEffectMap.count();
1594                 fChildEffectMap[var] = childEffectIndex;
1595                 continue;
1596             }
1597 
1598             // Opaque types include child processors and GL objects (samplers, textures, etc).
1599             // Of those, only child processors are legal variables.
1600             SkASSERT(!var->type().isVoid());
1601             SkASSERT(!var->type().isOpaque());
1602 
1603             // Builtin variables are system-defined, with special semantics.
1604             if (int builtin = var->layout().fBuiltin; builtin >= 0) {
1605                 if (builtin == SK_FRAGCOORD_BUILTIN) {
1606                     fBuilder.store_device_xy01(this->getVariableSlots(*var));
1607                     continue;
1608                 }
1609                 // The only builtin variable exposed to runtime effects is sk_FragCoord.
1610                 return unsupported();
1611             }
1612 
1613             if (IsUniform(*var)) {
1614                 // Create the uniform slot map in first-to-last order.
1615                 SlotRange uniformSlotRange = this->getUniformSlots(*var);
1616 
1617                 if (this->shouldWriteTraceOps()) {
1618                     // We expect uniform values to show up in the debug trace. To make this happen
1619                     // without updating the file format, we synthesize a value-slot range for the
1620                     // uniform here, and copy the uniform data into the value slots. This allows
1621                     // trace_var to work naturally. This wastes a bit of memory, but debug traces
1622                     // don't need to be hyper-efficient.
1623                     SlotRange copyRange = fProgramSlots.getVariableSlots(*var);
1624                     fBuilder.push_uniform(uniformSlotRange);
1625                     this->popToSlotRangeUnmasked(copyRange);
1626                 }
1627 
1628                 continue;
1629             }
1630 
1631             // Other globals are treated as normal variable declarations.
1632             if (!this->writeVarDeclaration(decl)) {
1633                 return unsupported();
1634             }
1635         }
1636     }
1637 
1638     return true;
1639 }
1640 
writeStatement(const Statement & s)1641 bool Generator::writeStatement(const Statement& s) {
1642     switch (s.kind()) {
1643         case Statement::Kind::kBlock:
1644             // The debugger will stop on statements inside Blocks; there's no need for an additional
1645             // stop on the block's initial open-brace.
1646         case Statement::Kind::kFor:
1647             // The debugger will stop on the init-statement of a for statement, so we don't need to
1648             // stop on the outer for-statement itself as well.
1649             break;
1650 
1651         default:
1652             // The debugger should stop on other statements.
1653             this->emitTraceLine(s.fPosition);
1654             break;
1655     }
1656 
1657     switch (s.kind()) {
1658         case Statement::Kind::kBlock:
1659             return this->writeBlock(s.as<Block>());
1660 
1661         case Statement::Kind::kBreak:
1662             return this->writeBreakStatement(s.as<BreakStatement>());
1663 
1664         case Statement::Kind::kContinue:
1665             return this->writeContinueStatement(s.as<ContinueStatement>());
1666 
1667         case Statement::Kind::kDo:
1668             return this->writeDoStatement(s.as<DoStatement>());
1669 
1670         case Statement::Kind::kExpression:
1671             return this->writeExpressionStatement(s.as<ExpressionStatement>());
1672 
1673         case Statement::Kind::kFor:
1674             return this->writeForStatement(s.as<ForStatement>());
1675 
1676         case Statement::Kind::kIf:
1677             return this->writeIfStatement(s.as<IfStatement>());
1678 
1679         case Statement::Kind::kNop:
1680             return true;
1681 
1682         case Statement::Kind::kReturn:
1683             return this->writeReturnStatement(s.as<ReturnStatement>());
1684 
1685         case Statement::Kind::kSwitch:
1686             return this->writeSwitchStatement(s.as<SwitchStatement>());
1687 
1688         case Statement::Kind::kVarDeclaration:
1689             return this->writeVarDeclaration(s.as<VarDeclaration>());
1690 
1691         default:
1692             return unsupported();
1693     }
1694 }
1695 
writeBlock(const Block & b)1696 bool Generator::writeBlock(const Block& b) {
1697     if (b.blockKind() == Block::Kind::kCompoundStatement) {
1698         this->emitTraceLine(b.fPosition);
1699         ++fInsideCompoundStatement;
1700     } else {
1701         this->pushTraceScopeMask();
1702         this->emitTraceScope(+1);
1703     }
1704 
1705     for (const std::unique_ptr<Statement>& stmt : b.children()) {
1706         if (!this->writeStatement(*stmt)) {
1707             return unsupported();
1708         }
1709     }
1710 
1711     if (b.blockKind() == Block::Kind::kCompoundStatement) {
1712         --fInsideCompoundStatement;
1713     } else {
1714         this->emitTraceScope(-1);
1715         this->discardTraceScopeMask();
1716     }
1717 
1718     return true;
1719 }
1720 
writeBreakStatement(const BreakStatement &)1721 bool Generator::writeBreakStatement(const BreakStatement&) {
1722     // If all lanes have reached this break, we can just branch straight to the break target instead
1723     // of updating masks.
1724     fBuilder.branch_if_all_lanes_active(fCurrentBreakTarget);
1725     fBuilder.mask_off_loop_mask();
1726     return true;
1727 }
1728 
writeContinueStatement(const ContinueStatement &)1729 bool Generator::writeContinueStatement(const ContinueStatement&) {
1730     fBuilder.continue_op(fCurrentContinueMask->stackID());
1731     return true;
1732 }
1733 
writeDoStatement(const DoStatement & d)1734 bool Generator::writeDoStatement(const DoStatement& d) {
1735     // Set up a break target.
1736     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1737 
1738     // Save off the original loop mask.
1739     fBuilder.enableExecutionMaskWrites();
1740     fBuilder.push_loop_mask();
1741 
1742     // If `continue` is used in the loop...
1743     Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*d.statement());
1744     AutoContinueMask autoContinueMask(this);
1745     if (loopInfo.fHasContinue) {
1746         // ... create a temporary slot for continue-mask storage.
1747         autoContinueMask.enable();
1748     }
1749 
1750     // Write the do-loop body.
1751     int labelID = fBuilder.nextLabelID();
1752     fBuilder.label(labelID);
1753 
1754     autoContinueMask.enterLoopBody();
1755 
1756     if (!this->writeStatement(*d.statement())) {
1757         return false;
1758     }
1759 
1760     autoContinueMask.exitLoopBody();
1761 
1762     // Point the debugger at the do-statement's test-expression before we run it.
1763     this->emitTraceLine(d.test()->fPosition);
1764 
1765     // Emit the test-expression, in order to combine it with the loop mask.
1766     if (!this->pushExpression(*d.test())) {
1767         return false;
1768     }
1769 
1770     // Mask off any lanes in the loop mask where the test-expression is false; this breaks the loop.
1771     // We don't use the test expression for anything else, so jettison it.
1772     fBuilder.merge_loop_mask();
1773     this->discardExpression(/*slots=*/1);
1774 
1775     // If any lanes are still running, go back to the top and run the loop body again.
1776     fBuilder.branch_if_any_lanes_active(labelID);
1777 
1778     // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1779     fBuilder.label(breakTarget.labelID());
1780 
1781     // Restore the loop mask.
1782     fBuilder.pop_loop_mask();
1783     fBuilder.disableExecutionMaskWrites();
1784 
1785     return true;
1786 }
1787 
writeMasklessForStatement(const ForStatement & f)1788 bool Generator::writeMasklessForStatement(const ForStatement& f) {
1789     SkASSERT(f.unrollInfo());
1790     SkASSERT(f.unrollInfo()->fCount > 0);
1791     SkASSERT(f.initializer());
1792     SkASSERT(f.test());
1793     SkASSERT(f.next());
1794 
1795     // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1796     // trace scope.
1797     this->pushTraceScopeMask();
1798     this->emitTraceScope(+1);
1799 
1800     // If no lanes are active, skip over the loop entirely. This guards against looping forever;
1801     // with no lanes active, we wouldn't be able to write the loop variable back to its slot, so
1802     // we'd never make forward progress.
1803     int loopExitID = fBuilder.nextLabelID();
1804     int loopBodyID = fBuilder.nextLabelID();
1805     fBuilder.branch_if_no_lanes_active(loopExitID);
1806 
1807     // Run the loop initializer.
1808     if (!this->writeStatement(*f.initializer())) {
1809         return unsupported();
1810     }
1811 
1812     // Write the for-loop body. We know the for-loop has a standard ES2 unrollable structure, and
1813     // that it runs for at least one iteration, so we can plow straight ahead into the loop body
1814     // instead of running the loop-test first.
1815     fBuilder.label(loopBodyID);
1816 
1817     if (!this->writeStatement(*f.statement())) {
1818         return unsupported();
1819     }
1820 
1821     // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1822     // can reasonably get.
1823     if (f.next()) {
1824         this->emitTraceLine(f.next()->fPosition);
1825     } else if (f.test()) {
1826         this->emitTraceLine(f.test()->fPosition);
1827     } else {
1828         this->emitTraceLine(f.fPosition);
1829     }
1830 
1831     // If the loop only runs for a single iteration, we are already done. If not...
1832     if (f.unrollInfo()->fCount > 1) {
1833         // ... run the next-expression, and immediately discard its result.
1834         if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1835             return unsupported();
1836         }
1837         this->discardExpression(f.next()->type().slotCount());
1838 
1839         // Run the test-expression, and repeat the loop until the test-expression evaluates false.
1840         if (!this->pushExpression(*f.test())) {
1841             return unsupported();
1842         }
1843         fBuilder.branch_if_no_active_lanes_on_stack_top_equal(0, loopBodyID);
1844 
1845         // Jettison the test-expression.
1846         this->discardExpression(/*slots=*/1);
1847     }
1848 
1849     fBuilder.label(loopExitID);
1850 
1851     this->emitTraceScope(-1);
1852     this->discardTraceScopeMask();
1853     return true;
1854 }
1855 
writeForStatement(const ForStatement & f)1856 bool Generator::writeForStatement(const ForStatement& f) {
1857     // If we've determined that the loop does not run, omit its code entirely.
1858     if (f.unrollInfo() && f.unrollInfo()->fCount == 0) {
1859         return true;
1860     }
1861 
1862     // If the loop doesn't escape early due to a `continue`, `break` or `return`, and the loop
1863     // conforms to ES2 structure, we know that we will run the full number of iterations across all
1864     // lanes and don't need to use a loop mask.
1865     Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*f.statement());
1866     if (!loopInfo.fHasContinue && !loopInfo.fHasBreak && !loopInfo.fHasReturn && f.unrollInfo()) {
1867         return this->writeMasklessForStatement(f);
1868     }
1869 
1870     // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1871     // trace scope.
1872     this->pushTraceScopeMask();
1873     this->emitTraceScope(+1);
1874 
1875     // Set up a break target.
1876     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1877 
1878     // Run the loop initializer.
1879     if (f.initializer()) {
1880         if (!this->writeStatement(*f.initializer())) {
1881             return unsupported();
1882         }
1883     } else {
1884         this->emitTraceLine(f.fPosition);
1885     }
1886 
1887     AutoContinueMask autoContinueMask(this);
1888     if (loopInfo.fHasContinue) {
1889         // Acquire a temporary slot for continue-mask storage.
1890         autoContinueMask.enable();
1891     }
1892 
1893     // Save off the original loop mask.
1894     fBuilder.enableExecutionMaskWrites();
1895     fBuilder.push_loop_mask();
1896 
1897     int loopTestID = fBuilder.nextLabelID();
1898     int loopBodyID = fBuilder.nextLabelID();
1899 
1900     // Jump down to the loop test so we can fall out of the loop immediately if it's zero-iteration.
1901     fBuilder.jump(loopTestID);
1902 
1903     // Write the for-loop body.
1904     fBuilder.label(loopBodyID);
1905 
1906     autoContinueMask.enterLoopBody();
1907 
1908     if (!this->writeStatement(*f.statement())) {
1909         return unsupported();
1910     }
1911 
1912     autoContinueMask.exitLoopBody();
1913 
1914     // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1915     // can reasonably get.
1916     if (f.next()) {
1917         this->emitTraceLine(f.next()->fPosition);
1918     } else if (f.test()) {
1919         this->emitTraceLine(f.test()->fPosition);
1920     } else {
1921         this->emitTraceLine(f.fPosition);
1922     }
1923 
1924     // Run the next-expression. Immediately discard its result.
1925     if (f.next()) {
1926         if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1927             return unsupported();
1928         }
1929         this->discardExpression(f.next()->type().slotCount());
1930     }
1931 
1932     fBuilder.label(loopTestID);
1933     if (f.test()) {
1934         // Emit the test-expression, in order to combine it with the loop mask.
1935         if (!this->pushExpression(*f.test())) {
1936             return unsupported();
1937         }
1938         // Mask off any lanes in the loop mask where the test-expression is false; this breaks the
1939         // loop. We don't use the test expression for anything else, so jettison it.
1940         fBuilder.merge_loop_mask();
1941         this->discardExpression(/*slots=*/1);
1942     }
1943 
1944     // If any lanes are still running, go back to the top and run the loop body again.
1945     fBuilder.branch_if_any_lanes_active(loopBodyID);
1946 
1947     // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1948     fBuilder.label(breakTarget.labelID());
1949 
1950     // Restore the loop mask.
1951     fBuilder.pop_loop_mask();
1952     fBuilder.disableExecutionMaskWrites();
1953 
1954     this->emitTraceScope(-1);
1955     this->discardTraceScopeMask();
1956     return true;
1957 }
1958 
writeExpressionStatement(const ExpressionStatement & e)1959 bool Generator::writeExpressionStatement(const ExpressionStatement& e) {
1960     if (!this->pushExpression(*e.expression(), /*usesResult=*/false)) {
1961         return unsupported();
1962     }
1963     this->discardExpression(e.expression()->type().slotCount());
1964     return true;
1965 }
1966 
writeDynamicallyUniformIfStatement(const IfStatement & i)1967 bool Generator::writeDynamicallyUniformIfStatement(const IfStatement& i) {
1968     SkASSERT(Analysis::IsDynamicallyUniformExpression(*i.test()));
1969 
1970     int falseLabelID = fBuilder.nextLabelID();
1971     int exitLabelID = fBuilder.nextLabelID();
1972 
1973     if (!this->pushExpression(*i.test())) {
1974         return unsupported();
1975     }
1976 
1977     fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
1978 
1979     if (!this->writeStatement(*i.ifTrue())) {
1980         return unsupported();
1981     }
1982 
1983     if (!i.ifFalse()) {
1984         // We don't have an if-false condition at all.
1985         fBuilder.label(falseLabelID);
1986     } else {
1987         // We do have an if-false condition. We've just completed the if-true block, so we need to
1988         // jump past the if-false block to avoid executing it.
1989         fBuilder.jump(exitLabelID);
1990 
1991         // The if-false block starts here.
1992         fBuilder.label(falseLabelID);
1993 
1994         if (!this->writeStatement(*i.ifFalse())) {
1995             return unsupported();
1996         }
1997 
1998         fBuilder.label(exitLabelID);
1999     }
2000 
2001     // Jettison the test-expression.
2002     this->discardExpression(/*slots=*/1);
2003     return true;
2004 }
2005 
writeIfStatement(const IfStatement & i)2006 bool Generator::writeIfStatement(const IfStatement& i) {
2007     // If the test condition is known to be uniform, we can skip over the untrue portion entirely.
2008     if (Analysis::IsDynamicallyUniformExpression(*i.test())) {
2009         return this->writeDynamicallyUniformIfStatement(i);
2010     }
2011 
2012     // Save the current condition-mask.
2013     fBuilder.enableExecutionMaskWrites();
2014     fBuilder.push_condition_mask();
2015 
2016     // Push the test condition mask.
2017     if (!this->pushExpression(*i.test())) {
2018         return unsupported();
2019     }
2020 
2021     // Merge the current condition-mask with the test condition, then run the if-true branch.
2022     fBuilder.merge_condition_mask();
2023     if (!this->writeStatement(*i.ifTrue())) {
2024         return unsupported();
2025     }
2026 
2027     if (i.ifFalse()) {
2028         // Apply the inverse condition-mask. Then run the if-false branch.
2029         fBuilder.merge_inv_condition_mask();
2030         if (!this->writeStatement(*i.ifFalse())) {
2031             return unsupported();
2032         }
2033     }
2034 
2035     // Jettison the test-expression, and restore the the condition-mask.
2036     this->discardExpression(/*slots=*/1);
2037     fBuilder.pop_condition_mask();
2038     fBuilder.disableExecutionMaskWrites();
2039 
2040     return true;
2041 }
2042 
writeReturnStatement(const ReturnStatement & r)2043 bool Generator::writeReturnStatement(const ReturnStatement& r) {
2044     if (r.expression()) {
2045         if (!this->pushExpression(*r.expression())) {
2046             return unsupported();
2047         }
2048         if (this->needsFunctionResultSlots(fCurrentFunction)) {
2049             this->popToSlotRange(fCurrentFunctionResult);
2050         }
2051     }
2052     if (fBuilder.executionMaskWritesAreEnabled() && this->needsReturnMask(fCurrentFunction)) {
2053         fBuilder.mask_off_return_mask();
2054     }
2055     return true;
2056 }
2057 
writeSwitchStatement(const SwitchStatement & s)2058 bool Generator::writeSwitchStatement(const SwitchStatement& s) {
2059     const StatementArray& cases = s.cases();
2060     SkASSERT(std::all_of(cases.begin(), cases.end(), [](const std::unique_ptr<Statement>& stmt) {
2061         return stmt->is<SwitchCase>();
2062     }));
2063 
2064     // Set up a break target.
2065     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
2066 
2067     // Save off the original loop mask.
2068     fBuilder.enableExecutionMaskWrites();
2069     fBuilder.push_loop_mask();
2070 
2071     // Push the switch-case value, and write a default-mask that enables every lane which already
2072     // has an active loop mask. As we match cases, the default mask will get pared down.
2073     if (!this->pushExpression(*s.value())) {
2074         return unsupported();
2075     }
2076     fBuilder.push_loop_mask();
2077 
2078     // Zero out the loop mask; each case op will re-enable it as we go.
2079     fBuilder.mask_off_loop_mask();
2080 
2081     // Write each switch-case.
2082     bool foundDefaultCase = false;
2083     for (const std::unique_ptr<Statement>& stmt : cases) {
2084         int skipLabelID = fBuilder.nextLabelID();
2085 
2086         const SwitchCase& sc = stmt->as<SwitchCase>();
2087         if (sc.isDefault()) {
2088             foundDefaultCase = true;
2089             if (stmt.get() != cases.back().get()) {
2090                 // We only support a default case when it is the very last case. If that changes,
2091                 // this logic will need to be updated.
2092                 return unsupported();
2093             }
2094             // Keep whatever lanes are executing now, and also enable any lanes in the default mask.
2095             fBuilder.pop_and_reenable_loop_mask();
2096             // Execute the switch-case block, if any lanes are alive to see it.
2097             fBuilder.branch_if_no_lanes_active(skipLabelID);
2098             if (!this->writeStatement(*sc.statement())) {
2099                 return unsupported();
2100             }
2101         } else {
2102             // The case-op will enable the loop mask if the switch-value matches, and mask off lanes
2103             // from the default-mask.
2104             fBuilder.case_op(sc.value());
2105             // Execute the switch-case block, if any lanes are alive to see it.
2106             fBuilder.branch_if_no_lanes_active(skipLabelID);
2107             if (!this->writeStatement(*sc.statement())) {
2108                 return unsupported();
2109             }
2110         }
2111         fBuilder.label(skipLabelID);
2112     }
2113 
2114     // Jettison the switch value, and the default case mask if it was never consumed above.
2115     this->discardExpression(/*slots=*/foundDefaultCase ? 1 : 2);
2116 
2117     // If we hit a break statement on all lanes, we will branch here to escape from the switch.
2118     fBuilder.label(breakTarget.labelID());
2119 
2120     // Restore the loop mask.
2121     fBuilder.pop_loop_mask();
2122     fBuilder.disableExecutionMaskWrites();
2123     return true;
2124 }
2125 
writeImmutableVarDeclaration(const VarDeclaration & d)2126 bool Generator::writeImmutableVarDeclaration(const VarDeclaration& d) {
2127     // In a debugging session, we expect debug traces for a variable declaration to appear, even if
2128     // it's constant, so we don't use immutable slots for variables when tracing is on.
2129     if (this->shouldWriteTraceOps()) {
2130         return false;
2131     }
2132 
2133     // Find the constant value for this variable.
2134     const Expression* initialValue = ConstantFolder::GetConstantValueForVariable(*d.value());
2135     SkASSERT(initialValue);
2136 
2137     // For a variable to be immutable, it cannot be written-to besides its initial declaration.
2138     ProgramUsage::VariableCounts counts = fProgram.fUsage->get(*d.var());
2139     if (counts.fWrite != 1) {
2140         return false;
2141     }
2142 
2143     STArray<16, ImmutableBits> immutableValues;
2144     if (!this->getImmutableValueForExpression(*initialValue, &immutableValues)) {
2145         return false;
2146     }
2147 
2148     fImmutableVariables.add(d.var());
2149 
2150     std::optional<SlotRange> preexistingSlots = this->findPreexistingImmutableData(immutableValues);
2151     if (preexistingSlots.has_value()) {
2152         // Associate this variable with a preexisting range of immutable data (no new data or code).
2153         fImmutableSlots.mapVariableToSlots(*d.var(), *preexistingSlots);
2154     } else {
2155         // Write out the constant value back to immutable slots. (This generates data, but no
2156         // runtime code.)
2157         SlotRange slots = this->getImmutableSlots(*d.var());
2158         this->storeImmutableValueToSlots(immutableValues, slots);
2159     }
2160 
2161     return true;
2162 }
2163 
writeVarDeclaration(const VarDeclaration & v)2164 bool Generator::writeVarDeclaration(const VarDeclaration& v) {
2165     if (v.value()) {
2166         // If a variable never actually changes, we can make it immutable.
2167         if (this->writeImmutableVarDeclaration(v)) {
2168             return true;
2169         }
2170         // This is a real variable which can change over the course of execution.
2171         if (!this->pushExpression(*v.value())) {
2172             return unsupported();
2173         }
2174         this->popToSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2175     } else {
2176         this->zeroSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2177     }
2178     return true;
2179 }
2180 
pushExpression(const Expression & e,bool usesResult)2181 bool Generator::pushExpression(const Expression& e, bool usesResult) {
2182     switch (e.kind()) {
2183         case Expression::Kind::kBinary:
2184             return this->pushBinaryExpression(e.as<BinaryExpression>());
2185 
2186         case Expression::Kind::kChildCall:
2187             return this->pushChildCall(e.as<ChildCall>());
2188 
2189         case Expression::Kind::kConstructorArray:
2190         case Expression::Kind::kConstructorArrayCast:
2191         case Expression::Kind::kConstructorCompound:
2192         case Expression::Kind::kConstructorStruct:
2193             return this->pushConstructorCompound(e.asAnyConstructor());
2194 
2195         case Expression::Kind::kConstructorCompoundCast:
2196         case Expression::Kind::kConstructorScalarCast:
2197             return this->pushConstructorCast(e.asAnyConstructor());
2198 
2199         case Expression::Kind::kConstructorDiagonalMatrix:
2200             return this->pushConstructorDiagonalMatrix(e.as<ConstructorDiagonalMatrix>());
2201 
2202         case Expression::Kind::kConstructorMatrixResize:
2203             return this->pushConstructorMatrixResize(e.as<ConstructorMatrixResize>());
2204 
2205         case Expression::Kind::kConstructorSplat:
2206             return this->pushConstructorSplat(e.as<ConstructorSplat>());
2207 
2208         case Expression::Kind::kEmpty:
2209             return true;
2210 
2211         case Expression::Kind::kFieldAccess:
2212             return this->pushFieldAccess(e.as<FieldAccess>());
2213 
2214         case Expression::Kind::kFunctionCall:
2215             return this->pushFunctionCall(e.as<FunctionCall>());
2216 
2217         case Expression::Kind::kIndex:
2218             return this->pushIndexExpression(e.as<IndexExpression>());
2219 
2220         case Expression::Kind::kLiteral:
2221             return this->pushLiteral(e.as<Literal>());
2222 
2223         case Expression::Kind::kPrefix:
2224             return this->pushPrefixExpression(e.as<PrefixExpression>());
2225 
2226         case Expression::Kind::kPostfix:
2227             return this->pushPostfixExpression(e.as<PostfixExpression>(), usesResult);
2228 
2229         case Expression::Kind::kSwizzle:
2230             return this->pushSwizzle(e.as<Swizzle>());
2231 
2232         case Expression::Kind::kTernary:
2233             return this->pushTernaryExpression(e.as<TernaryExpression>());
2234 
2235         case Expression::Kind::kVariableReference:
2236             return this->pushVariableReference(e.as<VariableReference>());
2237 
2238         default:
2239             return unsupported();
2240     }
2241 }
2242 
GetTypedOp(const SkSL::Type & type,const TypedOps & ops)2243 BuilderOp Generator::GetTypedOp(const SkSL::Type& type, const TypedOps& ops) {
2244     switch (type.componentType().numberKind()) {
2245         case Type::NumberKind::kFloat:    return ops.fFloatOp;
2246         case Type::NumberKind::kSigned:   return ops.fSignedOp;
2247         case Type::NumberKind::kUnsigned: return ops.fUnsignedOp;
2248         case Type::NumberKind::kBoolean:  return ops.fBooleanOp;
2249         default:                          return BuilderOp::unsupported;
2250     }
2251 }
2252 
unaryOp(const SkSL::Type & type,const TypedOps & ops)2253 bool Generator::unaryOp(const SkSL::Type& type, const TypedOps& ops) {
2254     BuilderOp op = GetTypedOp(type, ops);
2255     if (op == BuilderOp::unsupported) {
2256         return unsupported();
2257     }
2258     fBuilder.unary_op(op, type.slotCount());
2259     return true;
2260 }
2261 
binaryOp(const SkSL::Type & type,const TypedOps & ops)2262 bool Generator::binaryOp(const SkSL::Type& type, const TypedOps& ops) {
2263     BuilderOp op = GetTypedOp(type, ops);
2264     if (op == BuilderOp::unsupported) {
2265         return unsupported();
2266     }
2267     fBuilder.binary_op(op, type.slotCount());
2268     return true;
2269 }
2270 
ternaryOp(const SkSL::Type & type,const TypedOps & ops)2271 bool Generator::ternaryOp(const SkSL::Type& type, const TypedOps& ops) {
2272     BuilderOp op = GetTypedOp(type, ops);
2273     if (op == BuilderOp::unsupported) {
2274         return unsupported();
2275     }
2276     fBuilder.ternary_op(op, type.slotCount());
2277     return true;
2278 }
2279 
foldWithMultiOp(BuilderOp op,int elements)2280 void Generator::foldWithMultiOp(BuilderOp op, int elements) {
2281     // Fold the top N elements on the stack using an op that supports multiple slots, e.g.:
2282     // (A + B + C + D) -> add_2_floats $0..1 += $2..3
2283     //                    add_float    $0    += $1
2284     for (; elements >= 8; elements -= 4) {
2285         fBuilder.binary_op(op, /*slots=*/4);
2286     }
2287     for (; elements >= 6; elements -= 3) {
2288         fBuilder.binary_op(op, /*slots=*/3);
2289     }
2290     for (; elements >= 4; elements -= 2) {
2291         fBuilder.binary_op(op, /*slots=*/2);
2292     }
2293     for (; elements >= 2; elements -= 1) {
2294         fBuilder.binary_op(op, /*slots=*/1);
2295     }
2296 }
2297 
pushLValueOrExpression(LValue * lvalue,const Expression & expr)2298 bool Generator::pushLValueOrExpression(LValue* lvalue, const Expression& expr) {
2299     return lvalue ? this->push(*lvalue)
2300                   : this->pushExpression(expr);
2301 }
2302 
pushMatrixMultiply(LValue * lvalue,const Expression & left,const Expression & right,int leftColumns,int leftRows,int rightColumns,int rightRows)2303 bool Generator::pushMatrixMultiply(LValue* lvalue,
2304                                    const Expression& left,
2305                                    const Expression& right,
2306                                    int leftColumns,
2307                                    int leftRows,
2308                                    int rightColumns,
2309                                    int rightRows) {
2310     SkASSERT(left.type().isMatrix() || left.type().isVector());
2311     SkASSERT(right.type().isMatrix() || right.type().isVector());
2312 
2313     // Insert padding space on the stack to hold the result.
2314     fBuilder.pad_stack(rightColumns * leftRows);
2315 
2316     // Push the left and right matrices onto the stack.
2317     if (!this->pushLValueOrExpression(lvalue, left) || !this->pushExpression(right)) {
2318         return unsupported();
2319     }
2320 
2321     fBuilder.matrix_multiply(leftColumns, leftRows, rightColumns, rightRows);
2322 
2323     // If this multiply was actually an assignment (via *=), write the result back to the lvalue.
2324     return lvalue ? this->store(*lvalue)
2325                   : true;
2326 }
2327 
foldComparisonOp(Operator op,int elements)2328 void Generator::foldComparisonOp(Operator op, int elements) {
2329     switch (op.kind()) {
2330         case OperatorKind::EQEQ:
2331             // equal(x,y) returns a vector; use & to fold into a scalar.
2332             this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, elements);
2333             break;
2334 
2335         case OperatorKind::NEQ:
2336             // notEqual(x,y) returns a vector; use | to fold into a scalar.
2337             this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, elements);
2338             break;
2339 
2340         default:
2341             SkDEBUGFAIL("comparison only allows == and !=");
2342             break;
2343     }
2344 }
2345 
pushStructuredComparison(LValue * left,Operator op,LValue * right,const Type & type)2346 bool Generator::pushStructuredComparison(LValue* left,
2347                                          Operator op,
2348                                          LValue* right,
2349                                          const Type& type) {
2350     if (type.isStruct()) {
2351         // Compare every field in the struct.
2352         SkSpan<const Field> fields = type.fields();
2353         int currentSlot = 0;
2354         for (size_t index = 0; index < fields.size(); ++index) {
2355             const Type& fieldType = *fields[index].fType;
2356             const int   fieldSlotCount = fieldType.slotCount();
2357             UnownedLValueSlice fieldLeft {left,  currentSlot, fieldSlotCount};
2358             UnownedLValueSlice fieldRight{right, currentSlot, fieldSlotCount};
2359             if (!this->pushStructuredComparison(&fieldLeft, op, &fieldRight, fieldType)) {
2360                 return unsupported();
2361             }
2362             currentSlot += fieldSlotCount;
2363         }
2364 
2365         this->foldComparisonOp(op, fields.size());
2366         return true;
2367     }
2368 
2369     if (type.isArray()) {
2370         const Type& indexedType = type.componentType();
2371         if (indexedType.numberKind() == Type::NumberKind::kNonnumeric) {
2372             // Compare every element in the array.
2373             const int indexedSlotCount = indexedType.slotCount();
2374             int       currentSlot = 0;
2375             for (int index = 0; index < type.columns(); ++index) {
2376                 UnownedLValueSlice indexedLeft {left,  currentSlot, indexedSlotCount};
2377                 UnownedLValueSlice indexedRight{right, currentSlot, indexedSlotCount};
2378                 if (!this->pushStructuredComparison(&indexedLeft, op, &indexedRight, indexedType)) {
2379                     return unsupported();
2380                 }
2381                 currentSlot += indexedSlotCount;
2382             }
2383 
2384             this->foldComparisonOp(op, type.columns());
2385             return true;
2386         }
2387     }
2388 
2389     // We've winnowed down to a single element, or an array of homogeneous numeric elements.
2390     // Push the elements onto the stack, then compare them.
2391     if (!this->push(*left) || !this->push(*right)) {
2392         return unsupported();
2393     }
2394     switch (op.kind()) {
2395         case OperatorKind::EQEQ:
2396             if (!this->binaryOp(type, kEqualOps)) {
2397                 return unsupported();
2398             }
2399             break;
2400 
2401         case OperatorKind::NEQ:
2402             if (!this->binaryOp(type, kNotEqualOps)) {
2403                 return unsupported();
2404             }
2405             break;
2406 
2407         default:
2408             SkDEBUGFAIL("comparison only allows == and !=");
2409             break;
2410     }
2411 
2412     this->foldComparisonOp(op, type.slotCount());
2413     return true;
2414 }
2415 
pushBinaryExpression(const BinaryExpression & e)2416 bool Generator::pushBinaryExpression(const BinaryExpression& e) {
2417     return this->pushBinaryExpression(*e.left(), e.getOperator(), *e.right());
2418 }
2419 
pushBinaryExpression(const Expression & left,Operator op,const Expression & right)2420 bool Generator::pushBinaryExpression(const Expression& left, Operator op, const Expression& right) {
2421     switch (op.kind()) {
2422         // Rewrite greater-than ops as their less-than equivalents.
2423         case OperatorKind::GT:
2424             return this->pushBinaryExpression(right, OperatorKind::LT, left);
2425 
2426         case OperatorKind::GTEQ:
2427             return this->pushBinaryExpression(right, OperatorKind::LTEQ, left);
2428 
2429         // Handle struct and array comparisons.
2430         case OperatorKind::EQEQ:
2431         case OperatorKind::NEQ:
2432             if (left.type().isStruct() || left.type().isArray()) {
2433                 SkASSERT(left.type().matches(right.type()));
2434                 std::unique_ptr<LValue> lvLeft = this->makeLValue(left, /*allowScratch=*/true);
2435                 std::unique_ptr<LValue> lvRight = this->makeLValue(right, /*allowScratch=*/true);
2436                 return this->pushStructuredComparison(lvLeft.get(), op, lvRight.get(), left.type());
2437             }
2438             [[fallthrough]];
2439 
2440         // Rewrite commutative ops so that the literal is on the right-hand side. This gives the
2441         // Builder more opportunities to use immediate-mode ops.
2442         case OperatorKind::PLUS:
2443         case OperatorKind::STAR:
2444         case OperatorKind::BITWISEAND:
2445         case OperatorKind::BITWISEXOR:
2446         case OperatorKind::LOGICALXOR: {
2447             double unused;
2448             if (ConstantFolder::GetConstantValue(left, &unused) &&
2449                 !ConstantFolder::GetConstantValue(right, &unused)) {
2450                 return this->pushBinaryExpression(right, op, left);
2451             }
2452             break;
2453         }
2454         // Emit comma expressions.
2455         case OperatorKind::COMMA:
2456             if (Analysis::HasSideEffects(left)) {
2457                 if (!this->pushExpression(left, /*usesResult=*/false)) {
2458                     return unsupported();
2459                 }
2460                 this->discardExpression(left.type().slotCount());
2461             }
2462             return this->pushExpression(right);
2463 
2464         default:
2465             break;
2466     }
2467 
2468     // Handle binary expressions with mismatched types.
2469     bool vectorizeLeft = false, vectorizeRight = false;
2470     if (!left.type().matches(right.type())) {
2471         if (left.type().componentType().numberKind() != right.type().componentType().numberKind()) {
2472             return unsupported();
2473         }
2474         if (left.type().isScalar() && (right.type().isVector() || right.type().isMatrix())) {
2475             vectorizeLeft = true;
2476         } else if ((left.type().isVector() || left.type().isMatrix()) && right.type().isScalar()) {
2477             vectorizeRight = true;
2478         }
2479     }
2480 
2481     const Type& type = vectorizeLeft ? right.type() : left.type();
2482 
2483     // If this is an assignment...
2484     std::unique_ptr<LValue> lvalue;
2485     if (op.isAssignment()) {
2486         // ... turn the left side into an lvalue.
2487         lvalue = this->makeLValue(left);
2488         if (!lvalue) {
2489             return unsupported();
2490         }
2491 
2492         // Handle simple assignment (`var = expr`).
2493         if (op.kind() == OperatorKind::EQ) {
2494             return this->pushExpression(right) &&
2495                    this->store(*lvalue);
2496         }
2497 
2498         // Strip off the assignment from the op (turning += into +).
2499         op = op.removeAssignment();
2500     }
2501 
2502     // Handle matrix multiplication (MxM/MxV/VxM).
2503     if (op.kind() == OperatorKind::STAR) {
2504         // Matrix * matrix:
2505         if (left.type().isMatrix() && right.type().isMatrix()) {
2506             return this->pushMatrixMultiply(lvalue.get(), left, right,
2507                                             left.type().columns(), left.type().rows(),
2508                                             right.type().columns(), right.type().rows());
2509         }
2510 
2511         // Vector * matrix:
2512         if (left.type().isVector() && right.type().isMatrix()) {
2513             return this->pushMatrixMultiply(lvalue.get(), left, right,
2514                                             left.type().columns(), 1,
2515                                             right.type().columns(), right.type().rows());
2516         }
2517 
2518         // Matrix * vector:
2519         if (left.type().isMatrix() && right.type().isVector()) {
2520             return this->pushMatrixMultiply(lvalue.get(), left, right,
2521                                             left.type().columns(), left.type().rows(),
2522                                             1, right.type().columns());
2523         }
2524     }
2525 
2526     if (!vectorizeLeft && !vectorizeRight && !type.matches(right.type())) {
2527         // We have mismatched types but don't know how to handle them.
2528         return unsupported();
2529     }
2530 
2531     // Handle binary ops which require short-circuiting.
2532     switch (op.kind()) {
2533         case OperatorKind::LOGICALAND:
2534             if (Analysis::HasSideEffects(right)) {
2535                 // If the RHS has side effects, we rewrite `a && b` as `a ? b : false`. This
2536                 // generates pretty solid code and gives us the required short-circuit behavior.
2537                 SkASSERT(!op.isAssignment());
2538                 SkASSERT(type.componentType().isBoolean());
2539                 SkASSERT(type.slotCount() == 1);  // operator&& only works with scalar types
2540                 Literal falseLiteral{Position{}, 0.0, &right.type()};
2541                 return this->pushTernaryExpression(left, right, falseLiteral);
2542             }
2543             break;
2544 
2545         case OperatorKind::LOGICALOR:
2546             if (Analysis::HasSideEffects(right)) {
2547                 // If the RHS has side effects, we rewrite `a || b` as `a ? true : b`.
2548                 SkASSERT(!op.isAssignment());
2549                 SkASSERT(type.componentType().isBoolean());
2550                 SkASSERT(type.slotCount() == 1);  // operator|| only works with scalar types
2551                 Literal trueLiteral{Position{}, 1.0, &right.type()};
2552                 return this->pushTernaryExpression(left, trueLiteral, right);
2553             }
2554             break;
2555 
2556         default:
2557             break;
2558     }
2559 
2560     // Push the left- and right-expressions onto the stack.
2561     if (!this->pushLValueOrExpression(lvalue.get(), left)) {
2562         return unsupported();
2563     }
2564     if (vectorizeLeft) {
2565         fBuilder.push_duplicates(right.type().slotCount() - 1);
2566     }
2567     if (!this->pushExpression(right)) {
2568         return unsupported();
2569     }
2570     if (vectorizeRight) {
2571         fBuilder.push_duplicates(left.type().slotCount() - 1);
2572     }
2573 
2574     switch (op.kind()) {
2575         case OperatorKind::PLUS:
2576             if (!this->binaryOp(type, kAddOps)) {
2577                 return unsupported();
2578             }
2579             break;
2580 
2581         case OperatorKind::MINUS:
2582             if (!this->binaryOp(type, kSubtractOps)) {
2583                 return unsupported();
2584             }
2585             break;
2586 
2587         case OperatorKind::STAR:
2588             if (!this->binaryOp(type, kMultiplyOps)) {
2589                 return unsupported();
2590             }
2591             break;
2592 
2593         case OperatorKind::SLASH:
2594             if (!this->binaryOp(type, kDivideOps)) {
2595                 return unsupported();
2596             }
2597             break;
2598 
2599         case OperatorKind::LT:
2600         case OperatorKind::GT:
2601             if (!this->binaryOp(type, kLessThanOps)) {
2602                 return unsupported();
2603             }
2604             SkASSERT(type.slotCount() == 1);  // operator< only works with scalar types
2605             break;
2606 
2607         case OperatorKind::LTEQ:
2608         case OperatorKind::GTEQ:
2609             if (!this->binaryOp(type, kLessThanEqualOps)) {
2610                 return unsupported();
2611             }
2612             SkASSERT(type.slotCount() == 1);  // operator<= only works with scalar types
2613             break;
2614 
2615         case OperatorKind::EQEQ:
2616             if (!this->binaryOp(type, kEqualOps)) {
2617                 return unsupported();
2618             }
2619             this->foldComparisonOp(op, type.slotCount());
2620             break;
2621 
2622         case OperatorKind::NEQ:
2623             if (!this->binaryOp(type, kNotEqualOps)) {
2624                 return unsupported();
2625             }
2626             this->foldComparisonOp(op, type.slotCount());
2627             break;
2628 
2629         case OperatorKind::LOGICALAND:
2630         case OperatorKind::BITWISEAND:
2631             // For logical-and, we verified above that the RHS does not have side effects, so we
2632             // don't need to worry about short-circuiting side effects.
2633             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, type.slotCount());
2634             break;
2635 
2636         case OperatorKind::LOGICALOR:
2637         case OperatorKind::BITWISEOR:
2638             // For logical-or, we verified above that the RHS does not have side effects.
2639             fBuilder.binary_op(BuilderOp::bitwise_or_n_ints, type.slotCount());
2640             break;
2641 
2642         case OperatorKind::LOGICALXOR:
2643         case OperatorKind::BITWISEXOR:
2644             // Logical-xor does not short circuit.
2645             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, type.slotCount());
2646             break;
2647 
2648         default:
2649             return unsupported();
2650     }
2651 
2652     // If we have an lvalue, we need to write the result back into it.
2653     return lvalue ? this->store(*lvalue)
2654                   : true;
2655 }
2656 
getImmutableBitsForSlot(const Expression & expr,size_t slot)2657 std::optional<Generator::ImmutableBits> Generator::getImmutableBitsForSlot(const Expression& expr,
2658                                                                            size_t slot) {
2659     // Determine the constant-value of the slot; bail if it isn't constant.
2660     std::optional<double> v = expr.getConstantValue(slot);
2661     if (!v.has_value()) {
2662         return std::nullopt;
2663     }
2664     // Determine the number-kind of the slot, and convert the value to its bit-representation.
2665     Type::NumberKind kind = expr.type().slotType(slot).numberKind();
2666     double value = *v;
2667     switch (kind) {
2668         case Type::NumberKind::kFloat:
2669             return sk_bit_cast<ImmutableBits>((float)value);
2670 
2671         case Type::NumberKind::kSigned:
2672             return sk_bit_cast<ImmutableBits>((int32_t)value);
2673 
2674         case Type::NumberKind::kUnsigned:
2675             return sk_bit_cast<ImmutableBits>((uint32_t)value);
2676 
2677         case Type::NumberKind::kBoolean:
2678             return value ? ~0 : 0;
2679 
2680         default:
2681             return std::nullopt;
2682     }
2683 }
2684 
getImmutableValueForExpression(const Expression & expr,TArray<ImmutableBits> * immutableValues)2685 bool Generator::getImmutableValueForExpression(const Expression& expr,
2686                                                TArray<ImmutableBits>* immutableValues) {
2687     if (!expr.supportsConstantValues()) {
2688         return false;
2689     }
2690     size_t numSlots = expr.type().slotCount();
2691     immutableValues->reserve_exact(numSlots);
2692     for (size_t index = 0; index < numSlots; ++index) {
2693         std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, index);
2694         if (!bits.has_value()) {
2695             return false;
2696         }
2697         immutableValues->push_back(*bits);
2698     }
2699     return true;
2700 }
2701 
storeImmutableValueToSlots(const TArray<ImmutableBits> & immutableValues,SlotRange slots)2702 void Generator::storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues,
2703                                            SlotRange slots) {
2704     for (int index = 0; index < slots.count; ++index) {
2705         // Store the immutable value in its slot.
2706         const Slot slot = slots.index++;
2707         const ImmutableBits bits = immutableValues[index];
2708         fBuilder.store_immutable_value_i(slot, bits);
2709 
2710         // Keep track of every stored immutable value for potential later reuse.
2711         fImmutableSlotMap[bits].add(slot);
2712     }
2713 }
2714 
findPreexistingImmutableData(const TArray<ImmutableBits> & immutableValues)2715 std::optional<SlotRange> Generator::findPreexistingImmutableData(
2716         const TArray<ImmutableBits>& immutableValues) {
2717     STArray<16, const THashSet<Slot>*> slotArray;
2718     slotArray.reserve_exact(immutableValues.size());
2719 
2720     // Find all the slots associated with each immutable-value bit representation.
2721     // If a given bit-pattern doesn't exist anywhere in our program yet, we can stop searching.
2722     for (const ImmutableBits& immutableValue : immutableValues) {
2723         const THashSet<Slot>* slotsForValue = fImmutableSlotMap.find(immutableValue);
2724         if (!slotsForValue) {
2725             return std::nullopt;
2726         }
2727         slotArray.push_back(slotsForValue);
2728     }
2729 
2730     // Look for the group with the fewest number of entries, since that can be searched in the
2731     // least amount of effort.
2732     int leastSlotIndex = 0, leastSlotCount = INT_MAX;
2733     for (int index = 0; index < slotArray.size(); ++index) {
2734         int currentCount = slotArray[index]->count();
2735         if (currentCount < leastSlotCount) {
2736             leastSlotIndex = index;
2737             leastSlotCount = currentCount;
2738         }
2739     }
2740 
2741     // See if we can reconstitute the value that we want with any of the data we've already got.
2742     for (int slot : *slotArray[leastSlotIndex]) {
2743         int firstSlot = slot - leastSlotIndex;
2744         bool found = true;
2745         for (int index = 0; index < slotArray.size(); ++index) {
2746             if (!slotArray[index]->contains(firstSlot + index)) {
2747                 found = false;
2748                 break;
2749             }
2750         }
2751         if (found) {
2752             // We've found an exact match for the input value; return its slot-range.
2753             return SlotRange{firstSlot, slotArray.size()};
2754         }
2755     }
2756 
2757     // We didn't find any reusable slot ranges.
2758     return std::nullopt;
2759 }
2760 
pushImmutableData(const Expression & e)2761 bool Generator::pushImmutableData(const Expression& e) {
2762     STArray<16, ImmutableBits> immutableValues;
2763     if (!this->getImmutableValueForExpression(e, &immutableValues)) {
2764         return false;
2765     }
2766     std::optional<SlotRange> preexistingData = this->findPreexistingImmutableData(immutableValues);
2767     if (preexistingData.has_value()) {
2768         fBuilder.push_immutable(*preexistingData);
2769         return true;
2770     }
2771     SlotRange range = fImmutableSlots.createSlots(e.description(),
2772                                                   e.type(),
2773                                                   e.fPosition,
2774                                                   /*isFunctionReturnValue=*/false);
2775     this->storeImmutableValueToSlots(immutableValues, range);
2776     fBuilder.push_immutable(range);
2777     return true;
2778 }
2779 
pushConstructorCompound(const AnyConstructor & c)2780 bool Generator::pushConstructorCompound(const AnyConstructor& c) {
2781     if (c.type().slotCount() > 1 && this->pushImmutableData(c)) {
2782         return true;
2783     }
2784     for (const std::unique_ptr<Expression> &arg : c.argumentSpan()) {
2785         if (!this->pushExpression(*arg)) {
2786             return unsupported();
2787         }
2788     }
2789     return true;
2790 }
2791 
pushChildCall(const ChildCall & c)2792 bool Generator::pushChildCall(const ChildCall& c) {
2793     int* childIdx = fChildEffectMap.find(&c.child());
2794     SkASSERT(childIdx != nullptr);
2795     SkASSERT(!c.arguments().empty());
2796 
2797     // All child calls have at least one argument.
2798     const Expression* arg = c.arguments()[0].get();
2799     if (!this->pushExpression(*arg)) {
2800         return unsupported();
2801     }
2802 
2803     // Copy arguments from the stack into src/dst as required by this particular child-call.
2804     switch (c.child().type().typeKind()) {
2805         case Type::TypeKind::kShader: {
2806             // The argument must be a float2.
2807             SkASSERT(c.arguments().size() == 1);
2808             SkASSERT(arg->type().matches(*fContext.fTypes.fFloat2));
2809 
2810             // `exchange_src` will use the top four values on the stack, but we don't care what goes
2811             // into the blue/alpha components. We inject padding here to balance the stack.
2812             fBuilder.pad_stack(2);
2813 
2814             // Move the argument into src.rgba while also preserving the execution mask.
2815             fBuilder.exchange_src();
2816             fBuilder.invoke_shader(*childIdx);
2817             break;
2818         }
2819         case Type::TypeKind::kColorFilter: {
2820             // The argument must be a half4/float4.
2821             SkASSERT(c.arguments().size() == 1);
2822             SkASSERT(arg->type().matches(*fContext.fTypes.fHalf4) ||
2823                      arg->type().matches(*fContext.fTypes.fFloat4));
2824 
2825             // Move the argument into src.rgba while also preserving the execution mask.
2826             fBuilder.exchange_src();
2827             fBuilder.invoke_color_filter(*childIdx);
2828             break;
2829         }
2830         case Type::TypeKind::kBlender: {
2831             // Both arguments must be half4/float4.
2832             SkASSERT(c.arguments().size() == 2);
2833             SkASSERT(c.arguments()[0]->type().matches(*fContext.fTypes.fHalf4) ||
2834                      c.arguments()[0]->type().matches(*fContext.fTypes.fFloat4));
2835             SkASSERT(c.arguments()[1]->type().matches(*fContext.fTypes.fHalf4) ||
2836                      c.arguments()[1]->type().matches(*fContext.fTypes.fFloat4));
2837 
2838             // Move the second argument into dst.rgba, and the first argument into src.rgba, while
2839             // simultaneously preserving the execution mask.
2840             if (!this->pushExpression(*c.arguments()[1])) {
2841                 return unsupported();
2842             }
2843             fBuilder.pop_dst_rgba();
2844             fBuilder.exchange_src();
2845             fBuilder.invoke_blender(*childIdx);
2846             break;
2847         }
2848         default: {
2849             SkDEBUGFAILF("cannot sample from type '%s'", c.child().type().description().c_str());
2850         }
2851     }
2852 
2853     // The child call has returned the result color via src.rgba, and the SkRP execution mask is
2854     // on top of the stack. Swapping the two puts the result color on top of the stack, and also
2855     // restores our execution masks.
2856     fBuilder.exchange_src();
2857     return true;
2858 }
2859 
pushConstructorCast(const AnyConstructor & c)2860 bool Generator::pushConstructorCast(const AnyConstructor& c) {
2861     SkASSERT(c.argumentSpan().size() == 1);
2862     const Expression& inner = *c.argumentSpan().front();
2863     SkASSERT(inner.type().slotCount() == c.type().slotCount());
2864 
2865     if (!this->pushExpression(inner)) {
2866         return unsupported();
2867     }
2868     const Type::NumberKind innerKind = inner.type().componentType().numberKind();
2869     const Type::NumberKind outerKind = c.type().componentType().numberKind();
2870 
2871     if (innerKind == outerKind) {
2872         // Since we ignore type precision, this cast is effectively a no-op.
2873         return true;
2874     }
2875 
2876     switch (innerKind) {
2877         case Type::NumberKind::kSigned:
2878             if (outerKind == Type::NumberKind::kUnsigned) {
2879                 // Treat uint(int) as a no-op.
2880                 return true;
2881             }
2882             if (outerKind == Type::NumberKind::kFloat) {
2883                 fBuilder.unary_op(BuilderOp::cast_to_float_from_int, c.type().slotCount());
2884                 return true;
2885             }
2886             break;
2887 
2888         case Type::NumberKind::kUnsigned:
2889             if (outerKind == Type::NumberKind::kSigned) {
2890                 // Treat int(uint) as a no-op.
2891                 return true;
2892             }
2893             if (outerKind == Type::NumberKind::kFloat) {
2894                 fBuilder.unary_op(BuilderOp::cast_to_float_from_uint, c.type().slotCount());
2895                 return true;
2896             }
2897             break;
2898 
2899         case Type::NumberKind::kBoolean:
2900             // Converting boolean to int or float can be accomplished via bitwise-and.
2901             if (outerKind == Type::NumberKind::kFloat) {
2902                 fBuilder.push_constant_f(1.0f);
2903             } else if (outerKind == Type::NumberKind::kSigned ||
2904                        outerKind == Type::NumberKind::kUnsigned) {
2905                 fBuilder.push_constant_i(1);
2906             } else {
2907                 SkDEBUGFAILF("unexpected cast from bool to %s", c.type().description().c_str());
2908                 return unsupported();
2909             }
2910             fBuilder.push_duplicates(c.type().slotCount() - 1);
2911             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, c.type().slotCount());
2912             return true;
2913 
2914         case Type::NumberKind::kFloat:
2915             if (outerKind == Type::NumberKind::kSigned) {
2916                 fBuilder.unary_op(BuilderOp::cast_to_int_from_float, c.type().slotCount());
2917                 return true;
2918             }
2919             if (outerKind == Type::NumberKind::kUnsigned) {
2920                 fBuilder.unary_op(BuilderOp::cast_to_uint_from_float, c.type().slotCount());
2921                 return true;
2922             }
2923             break;
2924 
2925         case Type::NumberKind::kNonnumeric:
2926             break;
2927     }
2928 
2929     if (outerKind == Type::NumberKind::kBoolean) {
2930         // Converting int or float to boolean can be accomplished via `notEqual(x, 0)`.
2931         fBuilder.push_zeros(c.type().slotCount());
2932         return this->binaryOp(inner.type(), kNotEqualOps);
2933     }
2934 
2935     SkDEBUGFAILF("unexpected cast from %s to %s",
2936                  c.type().description().c_str(), inner.type().description().c_str());
2937     return unsupported();
2938 }
2939 
pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix & c)2940 bool Generator::pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c) {
2941     if (this->pushImmutableData(c)) {
2942         return true;
2943     }
2944     fBuilder.push_zeros(1);
2945     if (!this->pushExpression(*c.argument())) {
2946         return unsupported();
2947     }
2948     fBuilder.diagonal_matrix(c.type().columns(), c.type().rows());
2949 
2950     return true;
2951 }
2952 
pushConstructorMatrixResize(const ConstructorMatrixResize & c)2953 bool Generator::pushConstructorMatrixResize(const ConstructorMatrixResize& c) {
2954     if (!this->pushExpression(*c.argument())) {
2955         return unsupported();
2956     }
2957     fBuilder.matrix_resize(c.argument()->type().columns(),
2958                            c.argument()->type().rows(),
2959                            c.type().columns(),
2960                            c.type().rows());
2961     return true;
2962 }
2963 
pushConstructorSplat(const ConstructorSplat & c)2964 bool Generator::pushConstructorSplat(const ConstructorSplat& c) {
2965     if (!this->pushExpression(*c.argument())) {
2966         return unsupported();
2967     }
2968     fBuilder.push_duplicates(c.type().slotCount() - 1);
2969     return true;
2970 }
2971 
pushFieldAccess(const FieldAccess & f)2972 bool Generator::pushFieldAccess(const FieldAccess& f) {
2973     // If possible, get direct field access via the lvalue.
2974     std::unique_ptr<LValue> lvalue = this->makeLValue(f, /*allowScratch=*/true);
2975     return lvalue && this->push(*lvalue);
2976 }
2977 
pushFunctionCall(const FunctionCall & c)2978 bool Generator::pushFunctionCall(const FunctionCall& c) {
2979     if (c.function().isIntrinsic()) {
2980         return this->pushIntrinsic(c);
2981     }
2982 
2983     // Keep track of the current function.
2984     const FunctionDefinition* lastFunction = fCurrentFunction;
2985     fCurrentFunction = c.function().definition();
2986 
2987     // Skip over the function body entirely if there are no active lanes.
2988     // (If the function call was trivial, it would likely have been inlined in the frontend, so we
2989     // assume here that function calls generally represent a significant amount of work.)
2990     int skipLabelID = fBuilder.nextLabelID();
2991     fBuilder.branch_if_no_lanes_active(skipLabelID);
2992 
2993     // Emit the function body.
2994     std::optional<SlotRange> r = this->writeFunction(c, *fCurrentFunction, c.arguments());
2995     if (!r.has_value()) {
2996         return unsupported();
2997     }
2998 
2999     // If the function uses result slots, move its result from slots onto the stack.
3000     if (this->needsFunctionResultSlots(fCurrentFunction)) {
3001         fBuilder.push_slots(*r);
3002     }
3003 
3004     // We've returned back to the last function.
3005     fCurrentFunction = lastFunction;
3006 
3007     // Copy the function result from its slots onto the stack.
3008     fBuilder.label(skipLabelID);
3009     return true;
3010 }
3011 
pushIndexExpression(const IndexExpression & i)3012 bool Generator::pushIndexExpression(const IndexExpression& i) {
3013     std::unique_ptr<LValue> lvalue = this->makeLValue(i, /*allowScratch=*/true);
3014     return lvalue && this->push(*lvalue);
3015 }
3016 
pushIntrinsic(const FunctionCall & c)3017 bool Generator::pushIntrinsic(const FunctionCall& c) {
3018     const ExpressionArray& args = c.arguments();
3019     switch (args.size()) {
3020         case 1:
3021             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0]);
3022 
3023         case 2:
3024             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1]);
3025 
3026         case 3:
3027             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1], *args[2]);
3028 
3029         default:
3030             break;
3031     }
3032 
3033     return unsupported();
3034 }
3035 
pushLengthIntrinsic(int slotCount)3036 bool Generator::pushLengthIntrinsic(int slotCount) {
3037     if (slotCount == 1) {
3038         // `length(scalar)` is `sqrt(x^2)`, which is equivalent to `abs(x)`.
3039         return this->pushAbsFloatIntrinsic(/*slots=*/1);
3040     }
3041     // Implement `length(vec)` as `sqrt(dot(x, x))`.
3042     fBuilder.push_clone(slotCount);
3043     fBuilder.dot_floats(slotCount);
3044     fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3045     return true;
3046 }
3047 
pushAbsFloatIntrinsic(int slots)3048 bool Generator::pushAbsFloatIntrinsic(int slots) {
3049     // Perform abs(float) by masking off the sign bit.
3050     fBuilder.push_constant_u(0x7FFFFFFF, slots);
3051     fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, slots);
3052     return true;
3053 }
3054 
pushVectorizedExpression(const Expression & expr,const Type & vectorType)3055 bool Generator::pushVectorizedExpression(const Expression& expr, const Type& vectorType) {
3056     if (!this->pushExpression(expr)) {
3057         return unsupported();
3058     }
3059     if (vectorType.slotCount() > expr.type().slotCount()) {
3060         SkASSERT(expr.type().slotCount() == 1);
3061         fBuilder.push_duplicates(vectorType.slotCount() - expr.type().slotCount());
3062     }
3063     return true;
3064 }
3065 
pushIntrinsic(const TypedOps & ops,const Expression & arg0)3066 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0) {
3067     if (!this->pushExpression(arg0)) {
3068         return unsupported();
3069     }
3070     return this->unaryOp(arg0.type(), ops);
3071 }
3072 
pushIntrinsic(BuilderOp builderOp,const Expression & arg0)3073 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0) {
3074     if (!this->pushExpression(arg0)) {
3075         return unsupported();
3076     }
3077     fBuilder.unary_op(builderOp, arg0.type().slotCount());
3078     return true;
3079 }
3080 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0)3081 bool Generator::pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0) {
3082     switch (intrinsic) {
3083         case IntrinsicKind::k_abs_IntrinsicKind:
3084             if (arg0.type().componentType().isFloat()) {
3085                 // Perform abs(float) by masking off the sign bit.
3086                 if (!this->pushExpression(arg0)) {
3087                     return unsupported();
3088                 }
3089                 return this->pushAbsFloatIntrinsic(arg0.type().slotCount());
3090             }
3091             // We have a dedicated op for abs(int).
3092             return this->pushIntrinsic(BuilderOp::abs_int, arg0);
3093 
3094         case IntrinsicKind::k_any_IntrinsicKind:
3095             if (!this->pushExpression(arg0)) {
3096                 return unsupported();
3097             }
3098             this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, arg0.type().slotCount());
3099             return true;
3100 
3101         case IntrinsicKind::k_all_IntrinsicKind:
3102             if (!this->pushExpression(arg0)) {
3103                 return unsupported();
3104             }
3105             this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, arg0.type().slotCount());
3106             return true;
3107 
3108         case IntrinsicKind::k_acos_IntrinsicKind:
3109             return this->pushIntrinsic(BuilderOp::acos_float, arg0);
3110 
3111         case IntrinsicKind::k_asin_IntrinsicKind:
3112             return this->pushIntrinsic(BuilderOp::asin_float, arg0);
3113 
3114         case IntrinsicKind::k_atan_IntrinsicKind:
3115             return this->pushIntrinsic(BuilderOp::atan_float, arg0);
3116 
3117         case IntrinsicKind::k_ceil_IntrinsicKind:
3118             return this->pushIntrinsic(BuilderOp::ceil_float, arg0);
3119 
3120         case IntrinsicKind::k_cos_IntrinsicKind:
3121             return this->pushIntrinsic(BuilderOp::cos_float, arg0);
3122 
3123         case IntrinsicKind::k_degrees_IntrinsicKind: {
3124             Literal lit180OverPi{Position{}, 57.2957795131f, &arg0.type().componentType()};
3125             return this->pushBinaryExpression(arg0, OperatorKind::STAR, lit180OverPi);
3126         }
3127         case IntrinsicKind::k_floatBitsToInt_IntrinsicKind:
3128         case IntrinsicKind::k_floatBitsToUint_IntrinsicKind:
3129         case IntrinsicKind::k_intBitsToFloat_IntrinsicKind:
3130         case IntrinsicKind::k_uintBitsToFloat_IntrinsicKind:
3131             return this->pushExpression(arg0);
3132 
3133         case IntrinsicKind::k_exp_IntrinsicKind:
3134             return this->pushIntrinsic(BuilderOp::exp_float, arg0);
3135 
3136         case IntrinsicKind::k_exp2_IntrinsicKind:
3137             return this->pushIntrinsic(BuilderOp::exp2_float, arg0);
3138 
3139         case IntrinsicKind::k_floor_IntrinsicKind:
3140             return this->pushIntrinsic(BuilderOp::floor_float, arg0);
3141 
3142         case IntrinsicKind::k_fract_IntrinsicKind:
3143             // Implement fract as `x - floor(x)`.
3144             if (!this->pushExpression(arg0)) {
3145                 return unsupported();
3146             }
3147             fBuilder.push_clone(arg0.type().slotCount());
3148             fBuilder.unary_op(BuilderOp::floor_float, arg0.type().slotCount());
3149             return this->binaryOp(arg0.type(), kSubtractOps);
3150 
3151         case IntrinsicKind::k_inverse_IntrinsicKind:
3152             SkASSERT(arg0.type().isMatrix());
3153             SkASSERT(arg0.type().rows() == arg0.type().columns());
3154             if (!this->pushExpression(arg0)) {
3155                 return unsupported();
3156             }
3157             fBuilder.inverse_matrix(arg0.type().rows());
3158             return true;
3159 
3160         case IntrinsicKind::k_inversesqrt_IntrinsicKind:
3161             return this->pushIntrinsic(kInverseSqrtOps, arg0);
3162 
3163         case IntrinsicKind::k_length_IntrinsicKind:
3164             return this->pushExpression(arg0) &&
3165                    this->pushLengthIntrinsic(arg0.type().slotCount());
3166 
3167         case IntrinsicKind::k_log_IntrinsicKind:
3168             if (!this->pushExpression(arg0)) {
3169                 return unsupported();
3170             }
3171             fBuilder.unary_op(BuilderOp::log_float, arg0.type().slotCount());
3172             return true;
3173 
3174         case IntrinsicKind::k_log2_IntrinsicKind:
3175             if (!this->pushExpression(arg0)) {
3176                 return unsupported();
3177             }
3178             fBuilder.unary_op(BuilderOp::log2_float, arg0.type().slotCount());
3179             return true;
3180 
3181         case IntrinsicKind::k_normalize_IntrinsicKind: {
3182             // Implement normalize as `x / length(x)`. First, push the expression.
3183             if (!this->pushExpression(arg0)) {
3184                 return unsupported();
3185             }
3186             int slotCount = arg0.type().slotCount();
3187             if (slotCount > 1) {
3188 #if defined(SK_USE_RSQRT_IN_RP_NORMALIZE)
3189                 // Instead of `x / sqrt(dot(x, x))`, we can get roughly the same result in less time
3190                 // by computing `x * invsqrt(dot(x, x))`.
3191                 fBuilder.push_clone(slotCount);
3192                 fBuilder.push_clone(slotCount);
3193                 fBuilder.dot_floats(slotCount);
3194 
3195                 // Compute `vec(inversesqrt(dot(x, x)))`.
3196                 fBuilder.unary_op(BuilderOp::invsqrt_float, 1);
3197                 fBuilder.push_duplicates(slotCount - 1);
3198 
3199                 // Return `x * vec(inversesqrt(dot(x, x)))`.
3200                 return this->binaryOp(arg0.type(), kMultiplyOps);
3201 #else
3202                 // TODO: We can get roughly the same result in less time by using `invsqrt`, but
3203                 // that leads to more variance across architectures, which Chromium layout tests do
3204                 // not handle nicely.
3205                 fBuilder.push_clone(slotCount);
3206                 fBuilder.push_clone(slotCount);
3207                 fBuilder.dot_floats(slotCount);
3208 
3209                 // Compute `vec(sqrt(dot(x, x)))`.
3210                 fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3211                 fBuilder.push_duplicates(slotCount - 1);
3212 
3213                 // Return `x / vec(sqrt(dot(x, x)))`.
3214                 return this->binaryOp(arg0.type(), kDivideOps);
3215 #endif
3216             } else {
3217                 // For single-slot normalization, we can simplify `sqrt(x * x)` into `abs(x)`.
3218                 fBuilder.push_clone(slotCount);
3219                 return this->pushAbsFloatIntrinsic(/*slots=*/1) &&
3220                        this->binaryOp(arg0.type(), kDivideOps);
3221             }
3222         }
3223         case IntrinsicKind::k_not_IntrinsicKind:
3224             return this->pushPrefixExpression(OperatorKind::LOGICALNOT, arg0);
3225 
3226         case IntrinsicKind::k_radians_IntrinsicKind: {
3227             Literal litPiOver180{Position{}, 0.01745329251f, &arg0.type().componentType()};
3228             return this->pushBinaryExpression(arg0, OperatorKind::STAR, litPiOver180);
3229         }
3230         case IntrinsicKind::k_saturate_IntrinsicKind: {
3231             // Implement saturate as clamp(arg, 0, 1).
3232             Literal zeroLiteral{Position{}, 0.0, &arg0.type().componentType()};
3233             Literal oneLiteral{Position{}, 1.0, &arg0.type().componentType()};
3234             return this->pushIntrinsic(k_clamp_IntrinsicKind, arg0, zeroLiteral, oneLiteral);
3235         }
3236         case IntrinsicKind::k_sign_IntrinsicKind: {
3237             // Implement floating-point sign() as `clamp(arg * FLT_MAX, -1, 1)`.
3238             // FLT_MIN * FLT_MAX evaluates to 4, so multiplying any float value against FLT_MAX is
3239             // sufficient to ensure that |value| is always 1 or greater (excluding zero and nan).
3240             // Integer sign() doesn't need to worry about fractional values or nans, and can simply
3241             // be `clamp(arg, -1, 1)`.
3242             if (!this->pushExpression(arg0)) {
3243                 return unsupported();
3244             }
3245             if (arg0.type().componentType().isFloat()) {
3246                 Literal fltMaxLiteral{Position{}, FLT_MAX, &arg0.type().componentType()};
3247                 if (!this->pushVectorizedExpression(fltMaxLiteral, arg0.type())) {
3248                     return unsupported();
3249                 }
3250                 if (!this->binaryOp(arg0.type(), kMultiplyOps)) {
3251                     return unsupported();
3252                 }
3253             }
3254             Literal neg1Literal{Position{}, -1.0, &arg0.type().componentType()};
3255             if (!this->pushVectorizedExpression(neg1Literal, arg0.type())) {
3256                 return unsupported();
3257             }
3258             if (!this->binaryOp(arg0.type(), kMaxOps)) {
3259                 return unsupported();
3260             }
3261             Literal pos1Literal{Position{}, 1.0, &arg0.type().componentType()};
3262             if (!this->pushVectorizedExpression(pos1Literal, arg0.type())) {
3263                 return unsupported();
3264             }
3265             return this->binaryOp(arg0.type(), kMinOps);
3266         }
3267         case IntrinsicKind::k_sin_IntrinsicKind:
3268             return this->pushIntrinsic(BuilderOp::sin_float, arg0);
3269 
3270         case IntrinsicKind::k_sqrt_IntrinsicKind:
3271             return this->pushIntrinsic(BuilderOp::sqrt_float, arg0);
3272 
3273         case IntrinsicKind::k_tan_IntrinsicKind:
3274             return this->pushIntrinsic(BuilderOp::tan_float, arg0);
3275 
3276         case IntrinsicKind::k_transpose_IntrinsicKind:
3277             SkASSERT(arg0.type().isMatrix());
3278             if (!this->pushExpression(arg0)) {
3279                 return unsupported();
3280             }
3281             fBuilder.transpose(arg0.type().columns(), arg0.type().rows());
3282             return true;
3283 
3284         case IntrinsicKind::k_trunc_IntrinsicKind:
3285             // Implement trunc as `float(int(x))`, since float-to-int rounds toward zero.
3286             if (!this->pushExpression(arg0)) {
3287                 return unsupported();
3288             }
3289             fBuilder.unary_op(BuilderOp::cast_to_int_from_float, arg0.type().slotCount());
3290             fBuilder.unary_op(BuilderOp::cast_to_float_from_int, arg0.type().slotCount());
3291             return true;
3292 
3293         case IntrinsicKind::k_fromLinearSrgb_IntrinsicKind:
3294         case IntrinsicKind::k_toLinearSrgb_IntrinsicKind:
3295             // The argument must be a half3.
3296             SkASSERT(arg0.type().matches(*fContext.fTypes.fHalf3));
3297             if (!this->pushExpression(arg0)) {
3298                 return unsupported();
3299             }
3300 
3301             if (intrinsic == IntrinsicKind::k_fromLinearSrgb_IntrinsicKind) {
3302                 fBuilder.invoke_from_linear_srgb();
3303             } else {
3304                 fBuilder.invoke_to_linear_srgb();
3305             }
3306             return true;
3307 
3308         default:
3309             break;
3310     }
3311     return unsupported();
3312 }
3313 
pushIntrinsic(const TypedOps & ops,const Expression & arg0,const Expression & arg1)3314 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0, const Expression& arg1) {
3315     if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3316         return unsupported();
3317     }
3318     return this->binaryOp(arg0.type(), ops);
3319 }
3320 
pushIntrinsic(BuilderOp builderOp,const Expression & arg0,const Expression & arg1)3321 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0, const Expression& arg1) {
3322     if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3323         return unsupported();
3324     }
3325     fBuilder.binary_op(builderOp, arg0.type().slotCount());
3326     return true;
3327 }
3328 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1)3329 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3330                               const Expression& arg0,
3331                               const Expression& arg1) {
3332     switch (intrinsic) {
3333         case IntrinsicKind::k_atan_IntrinsicKind:
3334             return this->pushIntrinsic(BuilderOp::atan2_n_floats, arg0, arg1);
3335 
3336         case IntrinsicKind::k_cross_IntrinsicKind: {
3337             // Implement cross as `arg0.yzx * arg1.zxy - arg0.zxy * arg1.yzx`. We use two stacks so
3338             // that each subexpression can be multiplied separately.
3339             SkASSERT(arg0.type().matches(arg1.type()));
3340             SkASSERT(arg0.type().slotCount() == 3);
3341             SkASSERT(arg1.type().slotCount() == 3);
3342 
3343             // Push `arg0.yzx` onto this stack and `arg0.zxy` onto a separate subexpression stack.
3344             AutoStack subexpressionStack(this);
3345             subexpressionStack.enter();
3346             if (!this->pushExpression(arg0)) {
3347                 return unsupported();
3348             }
3349             subexpressionStack.exit();
3350             subexpressionStack.pushClone(/*slots=*/3);
3351 
3352             fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3353             subexpressionStack.enter();
3354             fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3355             subexpressionStack.exit();
3356 
3357             // Push `arg1.zxy` onto this stack and `arg1.yzx` onto the next stack. Perform the
3358             // multiply on each subexpression (`arg0.yzx * arg1.zxy` on the first stack, and
3359             // `arg0.zxy * arg1.yzx` on the next).
3360             subexpressionStack.enter();
3361             if (!this->pushExpression(arg1)) {
3362                 return unsupported();
3363             }
3364             subexpressionStack.exit();
3365             subexpressionStack.pushClone(/*slots=*/3);
3366 
3367             fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3368             fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3369 
3370             subexpressionStack.enter();
3371             fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3372             fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3373             subexpressionStack.exit();
3374 
3375             // Migrate the result of the second subexpression (`arg0.zxy * arg1.yzx`) back onto the
3376             // main stack and subtract it from the first subexpression (`arg0.yzx * arg1.zxy`).
3377             subexpressionStack.pushClone(/*slots=*/3);
3378             fBuilder.binary_op(BuilderOp::sub_n_floats, 3);
3379 
3380             // Now that the calculation is complete, discard the subexpression on the next stack.
3381             subexpressionStack.enter();
3382             this->discardExpression(/*slots=*/3);
3383             subexpressionStack.exit();
3384             return true;
3385         }
3386         case IntrinsicKind::k_distance_IntrinsicKind:
3387             // Implement distance as `length(a - b)`.
3388             SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3389             return this->pushBinaryExpression(arg0, OperatorKind::MINUS, arg1) &&
3390                    this->pushLengthIntrinsic(arg0.type().slotCount());
3391 
3392         case IntrinsicKind::k_dot_IntrinsicKind:
3393             SkASSERT(arg0.type().matches(arg1.type()));
3394             if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3395                 return unsupported();
3396             }
3397             fBuilder.dot_floats(arg0.type().slotCount());
3398             return true;
3399 
3400         case IntrinsicKind::k_equal_IntrinsicKind:
3401             SkASSERT(arg0.type().matches(arg1.type()));
3402             return this->pushIntrinsic(kEqualOps, arg0, arg1);
3403 
3404         case IntrinsicKind::k_notEqual_IntrinsicKind:
3405             SkASSERT(arg0.type().matches(arg1.type()));
3406             return this->pushIntrinsic(kNotEqualOps, arg0, arg1);
3407 
3408         case IntrinsicKind::k_lessThan_IntrinsicKind:
3409             SkASSERT(arg0.type().matches(arg1.type()));
3410             return this->pushIntrinsic(kLessThanOps, arg0, arg1);
3411 
3412         case IntrinsicKind::k_greaterThan_IntrinsicKind:
3413             SkASSERT(arg0.type().matches(arg1.type()));
3414             return this->pushIntrinsic(kLessThanOps, arg1, arg0);
3415 
3416         case IntrinsicKind::k_lessThanEqual_IntrinsicKind:
3417             SkASSERT(arg0.type().matches(arg1.type()));
3418             return this->pushIntrinsic(kLessThanEqualOps, arg0, arg1);
3419 
3420         case IntrinsicKind::k_greaterThanEqual_IntrinsicKind:
3421             SkASSERT(arg0.type().matches(arg1.type()));
3422             return this->pushIntrinsic(kLessThanEqualOps, arg1, arg0);
3423 
3424         case IntrinsicKind::k_min_IntrinsicKind:
3425             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3426             return this->pushIntrinsic(kMinOps, arg0, arg1);
3427 
3428         case IntrinsicKind::k_matrixCompMult_IntrinsicKind:
3429             SkASSERT(arg0.type().matches(arg1.type()));
3430             return this->pushIntrinsic(kMultiplyOps, arg0, arg1);
3431 
3432         case IntrinsicKind::k_max_IntrinsicKind:
3433             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3434             return this->pushIntrinsic(kMaxOps, arg0, arg1);
3435 
3436         case IntrinsicKind::k_mod_IntrinsicKind:
3437             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3438             return this->pushIntrinsic(kModOps, arg0, arg1);
3439 
3440         case IntrinsicKind::k_pow_IntrinsicKind:
3441             SkASSERT(arg0.type().matches(arg1.type()));
3442             return this->pushIntrinsic(BuilderOp::pow_n_floats, arg0, arg1);
3443 
3444         case IntrinsicKind::k_reflect_IntrinsicKind: {
3445             // Implement reflect as `I - (N * dot(I,N) * 2)`.
3446             SkASSERT(arg0.type().matches(arg1.type()));
3447             SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3448             SkASSERT(arg0.type().componentType().isFloat());
3449             int slotCount = arg0.type().slotCount();
3450 
3451             // Stack: I, N.
3452             if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3453                 return unsupported();
3454             }
3455             // Stack: I, N, I, N.
3456             fBuilder.push_clone(2 * slotCount);
3457             // Stack: I, N, dot(I,N)
3458             fBuilder.dot_floats(slotCount);
3459             // Stack: I, N, dot(I,N), 2
3460             fBuilder.push_constant_f(2.0);
3461             // Stack: I, N, dot(I,N) * 2
3462             fBuilder.binary_op(BuilderOp::mul_n_floats, 1);
3463             // Stack: I, N * dot(I,N) * 2
3464             fBuilder.push_duplicates(slotCount - 1);
3465             fBuilder.binary_op(BuilderOp::mul_n_floats, slotCount);
3466             // Stack: I - (N * dot(I,N) * 2)
3467             fBuilder.binary_op(BuilderOp::sub_n_floats, slotCount);
3468             return true;
3469         }
3470         case IntrinsicKind::k_step_IntrinsicKind: {
3471             // Compute step as `float(lessThanEqual(edge, x))`. We convert from boolean 0/~0 to
3472             // floating point zero/one by using a bitwise-and against the bit-pattern of 1.0.
3473             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3474             if (!this->pushVectorizedExpression(arg0, arg1.type()) || !this->pushExpression(arg1)) {
3475                 return unsupported();
3476             }
3477             if (!this->binaryOp(arg1.type(), kLessThanEqualOps)) {
3478                 return unsupported();
3479             }
3480             Literal pos1Literal{Position{}, 1.0, &arg1.type().componentType()};
3481             if (!this->pushVectorizedExpression(pos1Literal, arg1.type())) {
3482                 return unsupported();
3483             }
3484             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, arg1.type().slotCount());
3485             return true;
3486         }
3487 
3488         default:
3489             break;
3490     }
3491     return unsupported();
3492 }
3493 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1,const Expression & arg2)3494 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3495                               const Expression& arg0,
3496                               const Expression& arg1,
3497                               const Expression& arg2) {
3498     switch (intrinsic) {
3499         case IntrinsicKind::k_clamp_IntrinsicKind:
3500             // Implement clamp as min(max(arg, low), high).
3501             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3502             SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3503             if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3504                 return unsupported();
3505             }
3506             if (!this->binaryOp(arg0.type(), kMaxOps)) {
3507                 return unsupported();
3508             }
3509             if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3510                 return unsupported();
3511             }
3512             if (!this->binaryOp(arg0.type(), kMinOps)) {
3513                 return unsupported();
3514             }
3515             return true;
3516 
3517         case IntrinsicKind::k_faceforward_IntrinsicKind: {
3518             // Implement faceforward as `N ^ ((0 <= dot(I, NRef)) & 0x80000000)`.
3519             // In other words, flip the sign bit of N if `0 <= dot(I, NRef)`.
3520             SkASSERT(arg0.type().matches(arg1.type()));
3521             SkASSERT(arg0.type().matches(arg2.type()));
3522             int slotCount = arg0.type().slotCount();
3523 
3524             // Stack: N, 0, I, Nref
3525             if (!this->pushExpression(arg0)) {
3526                 return unsupported();
3527             }
3528             fBuilder.push_constant_f(0.0);
3529             if (!this->pushExpression(arg1) || !this->pushExpression(arg2)) {
3530                 return unsupported();
3531             }
3532             // Stack: N, 0, dot(I,NRef)
3533             fBuilder.dot_floats(slotCount);
3534             // Stack: N, (0 <= dot(I,NRef))
3535             fBuilder.binary_op(BuilderOp::cmple_n_floats, 1);
3536             // Stack: N, (0 <= dot(I,NRef)), 0x80000000
3537             fBuilder.push_constant_u(0x80000000);
3538             // Stack: N, (0 <= dot(I,NRef)) & 0x80000000)
3539             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
3540             // Stack: N, vec(0 <= dot(I,NRef)) & 0x80000000)
3541             fBuilder.push_duplicates(slotCount - 1);
3542             // Stack: N ^ vec((0 <= dot(I,NRef)) & 0x80000000)
3543             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, slotCount);
3544             return true;
3545         }
3546         case IntrinsicKind::k_mix_IntrinsicKind:
3547             // Note: our SkRP mix op takes the interpolation point first, not the interpolants.
3548             SkASSERT(arg0.type().matches(arg1.type()));
3549             if (arg2.type().componentType().isFloat()) {
3550                 SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3551                 if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3552                     return unsupported();
3553                 }
3554                 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3555                     return unsupported();
3556                 }
3557                 return this->ternaryOp(arg0.type(), kMixOps);
3558             }
3559             if (arg2.type().componentType().isBoolean()) {
3560                 if (!this->pushExpression(arg2)) {
3561                     return unsupported();
3562                 }
3563                 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3564                     return unsupported();
3565                 }
3566                 // The `mix_int` op isn't doing a lerp; it uses the third argument to select values
3567                 // from the first and second arguments. It's safe for use with any type in arguments
3568                 // 0 and 1.
3569                 fBuilder.ternary_op(BuilderOp::mix_n_ints, arg0.type().slotCount());
3570                 return true;
3571             }
3572             return unsupported();
3573 
3574         case IntrinsicKind::k_refract_IntrinsicKind: {
3575             // We always calculate refraction using vec4s, so we pad out unused N/I slots with zero.
3576             int padding = 4 - arg0.type().slotCount();
3577             if (!this->pushExpression(arg0)) {
3578                 return unsupported();
3579             }
3580             fBuilder.push_zeros(padding);
3581 
3582             if (!this->pushExpression(arg1)) {
3583                 return unsupported();
3584             }
3585             fBuilder.push_zeros(padding);
3586 
3587             // eta is always a scalar and doesn't need padding.
3588             if (!this->pushExpression(arg2)) {
3589                 return unsupported();
3590             }
3591             fBuilder.refract_floats();
3592 
3593             // The result vector was returned as a vec4, so discard the extra columns.
3594             fBuilder.discard_stack(padding);
3595             return true;
3596         }
3597         case IntrinsicKind::k_smoothstep_IntrinsicKind:
3598             SkASSERT(arg0.type().componentType().isFloat());
3599             SkASSERT(arg1.type().matches(arg0.type()));
3600             SkASSERT(arg2.type().componentType().isFloat());
3601 
3602             if (!this->pushVectorizedExpression(arg0, arg2.type()) ||
3603                 !this->pushVectorizedExpression(arg1, arg2.type()) ||
3604                 !this->pushExpression(arg2)) {
3605                 return unsupported();
3606             }
3607             fBuilder.ternary_op(BuilderOp::smoothstep_n_floats, arg2.type().slotCount());
3608             return true;
3609 
3610         default:
3611             break;
3612     }
3613     return unsupported();
3614 }
3615 
pushLiteral(const Literal & l)3616 bool Generator::pushLiteral(const Literal& l) {
3617     switch (l.type().numberKind()) {
3618         case Type::NumberKind::kFloat:
3619             fBuilder.push_constant_f(l.floatValue());
3620             return true;
3621 
3622         case Type::NumberKind::kSigned:
3623             fBuilder.push_constant_i(l.intValue());
3624             return true;
3625 
3626         case Type::NumberKind::kUnsigned:
3627             fBuilder.push_constant_u(l.intValue());
3628             return true;
3629 
3630         case Type::NumberKind::kBoolean:
3631             fBuilder.push_constant_i(l.boolValue() ? ~0 : 0);
3632             return true;
3633 
3634         default:
3635             SkUNREACHABLE;
3636     }
3637 }
3638 
pushPostfixExpression(const PostfixExpression & p,bool usesResult)3639 bool Generator::pushPostfixExpression(const PostfixExpression& p, bool usesResult) {
3640     // If the result is ignored...
3641     if (!usesResult) {
3642         // ... just emit a prefix expression instead.
3643         return this->pushPrefixExpression(p.getOperator(), *p.operand());
3644     }
3645     // Get the operand as an lvalue, and push it onto the stack as-is.
3646     std::unique_ptr<LValue> lvalue = this->makeLValue(*p.operand());
3647     if (!lvalue || !this->push(*lvalue)) {
3648         return unsupported();
3649     }
3650 
3651     // Push a scratch copy of the operand.
3652     fBuilder.push_clone(p.type().slotCount());
3653 
3654     // Increment or decrement the scratch copy by one.
3655     Literal oneLiteral{Position{}, 1.0, &p.type().componentType()};
3656     if (!this->pushVectorizedExpression(oneLiteral, p.type())) {
3657         return unsupported();
3658     }
3659 
3660     switch (p.getOperator().kind()) {
3661         case OperatorKind::PLUSPLUS:
3662             if (!this->binaryOp(p.type(), kAddOps)) {
3663                 return unsupported();
3664             }
3665             break;
3666 
3667         case OperatorKind::MINUSMINUS:
3668             if (!this->binaryOp(p.type(), kSubtractOps)) {
3669                 return unsupported();
3670             }
3671             break;
3672 
3673         default:
3674             SkUNREACHABLE;
3675     }
3676 
3677     // Write the new value back to the operand.
3678     if (!this->store(*lvalue)) {
3679         return unsupported();
3680     }
3681 
3682     // Discard the scratch copy, leaving only the original value as-is.
3683     this->discardExpression(p.type().slotCount());
3684     return true;
3685 }
3686 
pushPrefixExpression(const PrefixExpression & p)3687 bool Generator::pushPrefixExpression(const PrefixExpression& p) {
3688     return this->pushPrefixExpression(p.getOperator(), *p.operand());
3689 }
3690 
pushPrefixExpression(Operator op,const Expression & expr)3691 bool Generator::pushPrefixExpression(Operator op, const Expression& expr) {
3692     switch (op.kind()) {
3693         case OperatorKind::BITWISENOT:
3694         case OperatorKind::LOGICALNOT:
3695             // Handle operators ! and ~.
3696             if (!this->pushExpression(expr)) {
3697                 return unsupported();
3698             }
3699             fBuilder.push_constant_u(~0, expr.type().slotCount());
3700             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3701             return true;
3702 
3703         case OperatorKind::MINUS: {
3704             if (!this->pushExpression(expr)) {
3705                 return unsupported();
3706             }
3707             if (expr.type().componentType().isFloat()) {
3708                 // Handle float negation as an integer `x ^ 0x80000000`. This toggles the sign bit.
3709                 fBuilder.push_constant_u(0x80000000, expr.type().slotCount());
3710                 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3711             } else {
3712                 // Handle integer negation as a componentwise `expr * -1`.
3713                 fBuilder.push_constant_i(-1, expr.type().slotCount());
3714                 fBuilder.binary_op(BuilderOp::mul_n_ints, expr.type().slotCount());
3715             }
3716             return true;
3717         }
3718         case OperatorKind::PLUSPLUS: {
3719             // Rewrite as `expr += 1`.
3720             Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()};
3721             return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, oneLiteral);
3722         }
3723         case OperatorKind::MINUSMINUS: {
3724             // Rewrite as `expr += -1`.
3725             Literal minusOneLiteral{expr.fPosition, -1.0, &expr.type().componentType()};
3726             return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, minusOneLiteral);
3727         }
3728         default:
3729             break;
3730     }
3731 
3732     return unsupported();
3733 }
3734 
pushSwizzle(const Swizzle & s)3735 bool Generator::pushSwizzle(const Swizzle& s) {
3736     SkASSERT(!s.components().empty() && s.components().size() <= 4);
3737 
3738     // If this is a simple subset of a variable's slots...
3739     bool isSimpleSubset = is_sliceable_swizzle(s.components());
3740     if (isSimpleSubset && s.base()->is<VariableReference>()) {
3741         // ... we can just push part of the variable directly onto the stack, rather than pushing
3742         // the whole expression and then immediately cutting it down. (Either way works, but this
3743         // saves a step.)
3744         return this->pushVariableReferencePartial(
3745                 s.base()->as<VariableReference>(),
3746                 SlotRange{/*index=*/s.components()[0], /*count=*/s.components().size()});
3747     }
3748     // Push the base expression.
3749     if (!this->pushExpression(*s.base())) {
3750         return false;
3751     }
3752     // An identity swizzle doesn't rearrange the data; it just (potentially) discards tail elements.
3753     if (isSimpleSubset && s.components()[0] == 0) {
3754         int discardedElements = s.base()->type().slotCount() - s.components().size();
3755         SkASSERT(discardedElements >= 0);
3756         fBuilder.discard_stack(discardedElements);
3757         return true;
3758     }
3759     // Perform the swizzle.
3760     fBuilder.swizzle(s.base()->type().slotCount(), s.components());
3761     return true;
3762 }
3763 
pushTernaryExpression(const TernaryExpression & t)3764 bool Generator::pushTernaryExpression(const TernaryExpression& t) {
3765     return this->pushTernaryExpression(*t.test(), *t.ifTrue(), *t.ifFalse());
3766 }
3767 
pushDynamicallyUniformTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3768 bool Generator::pushDynamicallyUniformTernaryExpression(const Expression& test,
3769                                                         const Expression& ifTrue,
3770                                                         const Expression& ifFalse) {
3771     SkASSERT(Analysis::IsDynamicallyUniformExpression(test));
3772 
3773     int falseLabelID = fBuilder.nextLabelID();
3774     int exitLabelID = fBuilder.nextLabelID();
3775 
3776     // First, push the test-expression into a separate stack.
3777     AutoStack testStack(this);
3778     testStack.enter();
3779     if (!this->pushExpression(test)) {
3780         return unsupported();
3781     }
3782 
3783     // Branch to the true- or false-expression based on the test-expression. We can skip the
3784     // non-true path entirely since the test is known to be uniform.
3785     fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
3786     testStack.exit();
3787 
3788     if (!this->pushExpression(ifTrue)) {
3789         return unsupported();
3790     }
3791 
3792     fBuilder.jump(exitLabelID);
3793 
3794     // The builder doesn't understand control flow, and assumes that every push moves the stack-top
3795     // forwards. We need to manually balance out the `pushExpression` from the if-true path by
3796     // moving the stack position backwards, so that the if-false path pushes its expression into the
3797     // same as the if-true result.
3798     this->discardExpression(/*slots=*/ifTrue.type().slotCount());
3799 
3800     fBuilder.label(falseLabelID);
3801 
3802     if (!this->pushExpression(ifFalse)) {
3803         return unsupported();
3804     }
3805 
3806     fBuilder.label(exitLabelID);
3807 
3808     // Jettison the text-expression from the separate stack.
3809     testStack.enter();
3810     this->discardExpression(/*slots=*/1);
3811     testStack.exit();
3812     return true;
3813 }
3814 
pushTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3815 bool Generator::pushTernaryExpression(const Expression& test,
3816                                       const Expression& ifTrue,
3817                                       const Expression& ifFalse) {
3818     // If the test-expression is dynamically-uniform, we can skip over the non-true expressions
3819     // entirely, and not need to involve the condition mask.
3820     if (Analysis::IsDynamicallyUniformExpression(test)) {
3821         return this->pushDynamicallyUniformTernaryExpression(test, ifTrue, ifFalse);
3822     }
3823 
3824     // Analyze the ternary to see which corners we can safely cut.
3825     bool ifFalseHasSideEffects = Analysis::HasSideEffects(ifFalse);
3826     bool ifTrueHasSideEffects  = Analysis::HasSideEffects(ifTrue);
3827     bool ifTrueIsTrivial       = Analysis::IsTrivialExpression(ifTrue);
3828     int  cleanupLabelID        = fBuilder.nextLabelID();
3829 
3830     // If the true- and false-expressions both lack side effects, we evaluate both of them safely
3831     // without masking off their effects. In that case, we can emit both sides and use boolean mix
3832     // to select the correct result without using the condition mask at all.
3833     if (!ifFalseHasSideEffects && !ifTrueHasSideEffects && ifTrueIsTrivial) {
3834         // Push all of the arguments to mix.
3835         if (!this->pushVectorizedExpression(test, ifTrue.type())) {
3836             return unsupported();
3837         }
3838         if (!this->pushExpression(ifFalse)) {
3839             return unsupported();
3840         }
3841         if (!this->pushExpression(ifTrue)) {
3842             return unsupported();
3843         }
3844         // Use boolean mix to select the true- or false-expression via the test-expression.
3845         fBuilder.ternary_op(BuilderOp::mix_n_ints, ifTrue.type().slotCount());
3846         return true;
3847     }
3848 
3849     // First, push the current condition-mask and the test-expression into a separate stack.
3850     fBuilder.enableExecutionMaskWrites();
3851     AutoStack testStack(this);
3852     testStack.enter();
3853     fBuilder.push_condition_mask();
3854     if (!this->pushExpression(test)) {
3855         return unsupported();
3856     }
3857     testStack.exit();
3858 
3859     // We can take some shortcuts with condition-mask handling if the false-expression is entirely
3860     // side-effect free. (We can evaluate it without masking off its effects.) We always handle the
3861     // condition mask properly for the test-expression and true-expression properly.
3862     if (!ifFalseHasSideEffects) {
3863         // Push the false-expression onto the primary stack.
3864         if (!this->pushExpression(ifFalse)) {
3865             return unsupported();
3866         }
3867 
3868         // Next, merge the condition mask (on the separate stack) with the test expression.
3869         testStack.enter();
3870         fBuilder.merge_condition_mask();
3871         testStack.exit();
3872 
3873         // If no lanes are active, we can skip the true-expression entirely. This isn't super likely
3874         // to happen, so it's probably only a win for non-trivial true-expressions.
3875         if (!ifTrueIsTrivial) {
3876             fBuilder.branch_if_no_lanes_active(cleanupLabelID);
3877         }
3878 
3879         // Push the true-expression onto the primary stack, immediately after the false-expression.
3880         if (!this->pushExpression(ifTrue)) {
3881             return unsupported();
3882         }
3883 
3884         // Use a select to conditionally mask-merge the true-expression and false-expression lanes.
3885         fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3886         fBuilder.label(cleanupLabelID);
3887     } else {
3888         // Merge the condition mask (on the separate stack) with the test expression.
3889         testStack.enter();
3890         fBuilder.merge_condition_mask();
3891         testStack.exit();
3892 
3893         // Push the true-expression onto the primary stack.
3894         if (!this->pushExpression(ifTrue)) {
3895             return unsupported();
3896         }
3897 
3898         // Switch back to the test-expression stack and apply the inverted test condition.
3899         testStack.enter();
3900         fBuilder.merge_inv_condition_mask();
3901         testStack.exit();
3902 
3903         // Push the false-expression onto the primary stack, immediately after the true-expression.
3904         if (!this->pushExpression(ifFalse)) {
3905             return unsupported();
3906         }
3907 
3908         // Use a select to conditionally mask-merge the true-expression and false-expression lanes;
3909         // the mask is already set up for this.
3910         fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3911     }
3912 
3913     // Restore the condition-mask to its original state and jettison the test-expression.
3914     testStack.enter();
3915     this->discardExpression(/*slots=*/1);
3916     fBuilder.pop_condition_mask();
3917     testStack.exit();
3918 
3919     fBuilder.disableExecutionMaskWrites();
3920     return true;
3921 }
3922 
pushVariableReference(const VariableReference & var)3923 bool Generator::pushVariableReference(const VariableReference& var) {
3924     // If we are pushing a constant-value variable, push the value directly; literal values are more
3925     // amenable to optimization.
3926     if (var.type().isScalar() || var.type().isVector()) {
3927         if (const Expression* expr = ConstantFolder::GetConstantValueOrNull(var)) {
3928             return this->pushExpression(*expr);
3929         }
3930         if (fImmutableVariables.contains(var.variable())) {
3931             return this->pushExpression(*var.variable()->initialValue());
3932         }
3933     }
3934     return this->pushVariableReferencePartial(var, SlotRange{0, (int)var.type().slotCount()});
3935 }
3936 
pushVariableReferencePartial(const VariableReference & v,SlotRange subset)3937 bool Generator::pushVariableReferencePartial(const VariableReference& v, SlotRange subset) {
3938     const Variable& var = *v.variable();
3939     SlotRange r;
3940     if (IsUniform(var)) {
3941         // Push a uniform.
3942         r = this->getUniformSlots(var);
3943         SkASSERT(r.count == (int)var.type().slotCount());
3944         r.index += subset.index;
3945         r.count = subset.count;
3946         fBuilder.push_uniform(r);
3947     } else if (fImmutableVariables.contains(&var)) {
3948         // If we only need a single slot, we can push a constant. This saves a lookup, and can
3949         // occasionally permit the use of an immediate-mode op.
3950         if (subset.count == 1) {
3951             const Expression& expr = *v.variable()->initialValue();
3952             std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, subset.index);
3953             if (bits.has_value()) {
3954                 fBuilder.push_constant_i(*bits);
3955                 return true;
3956             }
3957         }
3958         // Push the immutable slot range.
3959         r = this->getImmutableSlots(var);
3960         SkASSERT(r.count == (int)var.type().slotCount());
3961         r.index += subset.index;
3962         r.count = subset.count;
3963         fBuilder.push_immutable(r);
3964     } else {
3965         // Push the variable.
3966         r = this->getVariableSlots(var);
3967         SkASSERT(r.count == (int)var.type().slotCount());
3968         r.index += subset.index;
3969         r.count = subset.count;
3970         fBuilder.push_slots(r);
3971     }
3972     return true;
3973 }
3974 
writeProgram(const FunctionDefinition & function)3975 bool Generator::writeProgram(const FunctionDefinition& function) {
3976     fCurrentFunction = &function;
3977 
3978     if (fDebugTrace) {
3979         // Copy the program source into the debug info so that it will be written in the trace file.
3980         fDebugTrace->setSource(*fProgram.fSource);
3981 
3982         if (fWriteTraceOps) {
3983             // The Raster Pipeline blitter generates centered pixel coordinates. (0.5, 1.5, 2.5,
3984             // etc.) Add 0.5 to the requested trace coordinate to match this, then compare against
3985             // src.rg, which contains the shader's coordinates. We keep this result in a dedicated
3986             // trace-mask stack.
3987             fTraceMask.emplace(this);
3988             fTraceMask->enter();
3989             fBuilder.push_device_xy01();
3990             fBuilder.discard_stack(2);
3991             fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fX + 0.5f);
3992             fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fY + 0.5f);
3993             fBuilder.binary_op(BuilderOp::cmpeq_n_floats, 2);
3994             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
3995             fTraceMask->exit();
3996 
3997             // Assemble a position-to-line-number mapping for the debugger.
3998             this->calculateLineOffsets();
3999         }
4000     }
4001 
4002     // Assign slots to the parameters of main; copy src and dst into those slots as appropriate.
4003     const SkSL::Variable* mainCoordsParam = function.declaration().getMainCoordsParameter();
4004     const SkSL::Variable* mainInputColorParam = function.declaration().getMainInputColorParameter();
4005     const SkSL::Variable* mainDestColorParam = function.declaration().getMainDestColorParameter();
4006 
4007     for (const SkSL::Variable* param : function.declaration().parameters()) {
4008         if (param == mainCoordsParam) {
4009             // Coordinates are passed via RG.
4010             SlotRange fragCoord = this->getVariableSlots(*param);
4011             SkASSERT(fragCoord.count == 2);
4012             fBuilder.store_src_rg(fragCoord);
4013         } else if (param == mainInputColorParam) {
4014             // Input colors are passed via RGBA.
4015             SlotRange srcColor = this->getVariableSlots(*param);
4016             SkASSERT(srcColor.count == 4);
4017             fBuilder.store_src(srcColor);
4018         } else if (param == mainDestColorParam) {
4019             // Dest colors are passed via dRGBA.
4020             SlotRange destColor = this->getVariableSlots(*param);
4021             SkASSERT(destColor.count == 4);
4022             fBuilder.store_dst(destColor);
4023         } else {
4024             SkDEBUGFAIL("Invalid parameter to main()");
4025             return unsupported();
4026         }
4027     }
4028 
4029     // Initialize the program.
4030     fBuilder.init_lane_masks();
4031 
4032     // Emit global variables.
4033     if (!this->writeGlobals()) {
4034         return unsupported();
4035     }
4036 
4037     // Invoke main().
4038     std::optional<SlotRange> mainResult = this->writeFunction(function, function, /*arguments=*/{});
4039     if (!mainResult.has_value()) {
4040         return unsupported();
4041     }
4042 
4043     // Move the result of main() from slots into RGBA.
4044     SkASSERT(mainResult->count == 4);
4045     if (this->needsFunctionResultSlots(fCurrentFunction)) {
4046         fBuilder.load_src(*mainResult);
4047     } else {
4048         fBuilder.pop_src_rgba();
4049     }
4050 
4051     // Discard the trace mask.
4052     if (fTraceMask.has_value()) {
4053         fTraceMask->enter();
4054         fBuilder.discard_stack(1);
4055         fTraceMask->exit();
4056     }
4057 
4058     return true;
4059 }
4060 
finish()4061 std::unique_ptr<RP::Program> Generator::finish() {
4062     return fBuilder.finish(fProgramSlots.slotCount(),
4063                            fUniformSlots.slotCount(),
4064                            fImmutableSlots.slotCount(),
4065                            fDebugTrace);
4066 }
4067 
4068 }  // namespace RP
4069 
MakeRasterPipelineProgram(const SkSL::Program & program,const FunctionDefinition & function,DebugTracePriv * debugTrace,bool writeTraceOps)4070 std::unique_ptr<RP::Program> MakeRasterPipelineProgram(const SkSL::Program& program,
4071                                                        const FunctionDefinition& function,
4072                                                        DebugTracePriv* debugTrace,
4073                                                        bool writeTraceOps) {
4074     RP::Generator generator(program, debugTrace, writeTraceOps);
4075     if (!generator.writeProgram(function)) {
4076         return nullptr;
4077     }
4078     return generator.finish();
4079 }
4080 
4081 }  // namespace SkSL
4082