1 //
2 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 #include "compiler/translator/msl/TranslatorMSL.h"
8
9 #include "angle_gl.h"
10 #include "common/utilities.h"
11 #include "compiler/translator/ImmutableStringBuilder.h"
12 #include "compiler/translator/StaticType.h"
13 #include "compiler/translator/msl/AstHelpers.h"
14 #include "compiler/translator/msl/DriverUniformMetal.h"
15 #include "compiler/translator/msl/EmitMetal.h"
16 #include "compiler/translator/msl/Name.h"
17 #include "compiler/translator/msl/RewritePipelines.h"
18 #include "compiler/translator/msl/SymbolEnv.h"
19 #include "compiler/translator/msl/ToposortStructs.h"
20 #include "compiler/translator/msl/UtilsMSL.h"
21 #include "compiler/translator/tree_ops/InitializeVariables.h"
22 #include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h"
23 #include "compiler/translator/tree_ops/PreTransformTextureCubeGradDerivatives.h"
24 #include "compiler/translator/tree_ops/RemoveAtomicCounterBuiltins.h"
25 #include "compiler/translator/tree_ops/RemoveInactiveInterfaceVariables.h"
26 #include "compiler/translator/tree_ops/RewriteArrayOfArrayOfOpaqueUniforms.h"
27 #include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
28 #include "compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h"
29 #include "compiler/translator/tree_ops/RewriteDfdy.h"
30 #include "compiler/translator/tree_ops/RewriteStructSamplers.h"
31 #include "compiler/translator/tree_ops/SeparateStructFromUniformDeclarations.h"
32 #include "compiler/translator/tree_ops/msl/AddExplicitTypeCasts.h"
33 #include "compiler/translator/tree_ops/msl/ConvertUnsupportedConstructorsToFunctionCalls.h"
34 #include "compiler/translator/tree_ops/msl/FixTypeConstructors.h"
35 #include "compiler/translator/tree_ops/msl/HoistConstants.h"
36 #include "compiler/translator/tree_ops/msl/IntroduceVertexIndexID.h"
37 #include "compiler/translator/tree_ops/msl/NameEmbeddedUniformStructsMetal.h"
38 #include "compiler/translator/tree_ops/msl/ReduceInterfaceBlocks.h"
39 #include "compiler/translator/tree_ops/msl/RewriteCaseDeclarations.h"
40 #include "compiler/translator/tree_ops/msl/RewriteInterpolants.h"
41 #include "compiler/translator/tree_ops/msl/RewriteOutArgs.h"
42 #include "compiler/translator/tree_ops/msl/RewriteUnaddressableReferences.h"
43 #include "compiler/translator/tree_ops/msl/SeparateCompoundExpressions.h"
44 #include "compiler/translator/tree_ops/msl/SeparateCompoundStructDeclarations.h"
45 #include "compiler/translator/tree_ops/msl/WrapMain.h"
46 #include "compiler/translator/tree_util/BuiltIn.h"
47 #include "compiler/translator/tree_util/DriverUniform.h"
48 #include "compiler/translator/tree_util/FindFunction.h"
49 #include "compiler/translator/tree_util/FindMain.h"
50 #include "compiler/translator/tree_util/FindSymbolNode.h"
51 #include "compiler/translator/tree_util/IntermNode_util.h"
52 #include "compiler/translator/tree_util/ReplaceClipCullDistanceVariable.h"
53 #include "compiler/translator/tree_util/ReplaceVariable.h"
54 #include "compiler/translator/tree_util/RunAtTheBeginningOfShader.h"
55 #include "compiler/translator/tree_util/RunAtTheEndOfShader.h"
56 #include "compiler/translator/tree_util/SpecializationConstant.h"
57 #include "compiler/translator/util.h"
58
59 namespace sh
60 {
61
62 namespace
63 {
64
65 constexpr Name kFlippedPointCoordName("flippedPointCoord", SymbolType::AngleInternal);
66 constexpr Name kFlippedFragCoordName("flippedFragCoord", SymbolType::AngleInternal);
67
68 class DeclareStructTypesTraverser : public TIntermTraverser
69 {
70 public:
DeclareStructTypesTraverser(TOutputMSL * outputMSL)71 explicit DeclareStructTypesTraverser(TOutputMSL *outputMSL)
72 : TIntermTraverser(true, false, false), mOutputMSL(outputMSL)
73 {}
74
visitDeclaration(Visit visit,TIntermDeclaration * node)75 bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
76 {
77 ASSERT(visit == PreVisit);
78 if (!mInGlobalScope)
79 {
80 return false;
81 }
82
83 const TIntermSequence &sequence = *(node->getSequence());
84 TIntermTyped *declarator = sequence.front()->getAsTyped();
85 const TType &type = declarator->getType();
86
87 if (type.isStructSpecifier())
88 {
89 const TStructure *structure = type.getStruct();
90
91 // Embedded structs should be parsed away by now.
92 ASSERT(structure->symbolType() != SymbolType::Empty);
93 // outputMSL->writeStructType(structure);
94
95 TIntermSymbol *symbolNode = declarator->getAsSymbolNode();
96 if (symbolNode && symbolNode->variable().symbolType() == SymbolType::Empty)
97 {
98 // Remove the struct specifier declaration from the tree so it isn't parsed again.
99 TIntermSequence emptyReplacement;
100 mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node,
101 std::move(emptyReplacement));
102 }
103 }
104 // TODO: REMOVE, used to remove 'unsued' warning
105 mOutputMSL = nullptr;
106
107 return false;
108 }
109
110 private:
111 TOutputMSL *mOutputMSL;
112 };
113
114 class DeclareDefaultUniformsTraverser : public TIntermTraverser
115 {
116 public:
DeclareDefaultUniformsTraverser(TInfoSinkBase * sink,ShHashFunction64 hashFunction,NameMap * nameMap)117 DeclareDefaultUniformsTraverser(TInfoSinkBase *sink,
118 ShHashFunction64 hashFunction,
119 NameMap *nameMap)
120 : TIntermTraverser(true, true, true),
121 mSink(sink),
122 mHashFunction(hashFunction),
123 mNameMap(nameMap),
124 mInDefaultUniform(false)
125 {}
126
visitDeclaration(Visit visit,TIntermDeclaration * node)127 bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
128 {
129 const TIntermSequence &sequence = *(node->getSequence());
130
131 // TODO(jmadill): Compound declarations.
132 ASSERT(sequence.size() == 1);
133
134 TIntermTyped *variable = sequence.front()->getAsTyped();
135 const TType &type = variable->getType();
136 bool isUniform = type.getQualifier() == EvqUniform && !type.isInterfaceBlock() &&
137 !IsOpaqueType(type.getBasicType());
138
139 if (visit == PreVisit)
140 {
141 if (isUniform)
142 {
143 (*mSink) << " " << GetTypeName(type, mHashFunction, mNameMap) << " ";
144 mInDefaultUniform = true;
145 }
146 }
147 else if (visit == InVisit)
148 {
149 mInDefaultUniform = isUniform;
150 }
151 else if (visit == PostVisit)
152 {
153 if (isUniform)
154 {
155 (*mSink) << ";\n";
156
157 // Remove the uniform declaration from the tree so it isn't parsed again.
158 TIntermSequence emptyReplacement;
159 mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node,
160 std::move(emptyReplacement));
161 }
162
163 mInDefaultUniform = false;
164 }
165 return true;
166 }
167
visitSymbol(TIntermSymbol * symbol)168 void visitSymbol(TIntermSymbol *symbol) override
169 {
170 if (mInDefaultUniform)
171 {
172 const ImmutableString &name = symbol->variable().name();
173 ASSERT(!gl::IsBuiltInName(name.data()));
174 (*mSink) << HashName(&symbol->variable(), mHashFunction, mNameMap)
175 << ArrayString(symbol->getType());
176 }
177 }
178
179 private:
180 TInfoSinkBase *mSink;
181 ShHashFunction64 mHashFunction;
182 NameMap *mNameMap;
183 bool mInDefaultUniform;
184 };
185
186 // Declares a new variable to replace gl_DepthRange, its values are fed from a driver uniform.
ReplaceGLDepthRangeWithDriverUniform(TCompiler * compiler,TIntermBlock * root,const DriverUniformMetal * driverUniforms,TSymbolTable * symbolTable)187 [[nodiscard]] bool ReplaceGLDepthRangeWithDriverUniform(TCompiler *compiler,
188 TIntermBlock *root,
189 const DriverUniformMetal *driverUniforms,
190 TSymbolTable *symbolTable)
191 {
192 // Create a symbol reference to "gl_DepthRange"
193 const TVariable *depthRangeVar = static_cast<const TVariable *>(
194 symbolTable->findBuiltIn(ImmutableString("gl_DepthRange"), 0));
195
196 // ANGLEUniforms.depthRange
197 TIntermTyped *angleEmulatedDepthRangeRef = driverUniforms->getDepthRange();
198
199 // Use this variable instead of gl_DepthRange everywhere.
200 return ReplaceVariableWithTyped(compiler, root, depthRangeVar, angleEmulatedDepthRangeRef);
201 }
202
GetMainSequence(TIntermBlock * root)203 TIntermSequence *GetMainSequence(TIntermBlock *root)
204 {
205 TIntermFunctionDefinition *main = FindMain(root);
206 return main->getBody()->getSequence();
207 }
208
209 // Replaces a builtin variable with a version that is rotated and corrects the X and Y coordinates.
FlipBuiltinVariable(TCompiler * compiler,TIntermBlock * root,TIntermSequence * insertSequence,TIntermTyped * flipXY,TSymbolTable * symbolTable,const TVariable * builtin,const Name & flippedVariableName,TIntermTyped * pivot)210 [[nodiscard]] bool FlipBuiltinVariable(TCompiler *compiler,
211 TIntermBlock *root,
212 TIntermSequence *insertSequence,
213 TIntermTyped *flipXY,
214 TSymbolTable *symbolTable,
215 const TVariable *builtin,
216 const Name &flippedVariableName,
217 TIntermTyped *pivot)
218 {
219 // Create a symbol reference to 'builtin'.
220 TIntermSymbol *builtinRef = new TIntermSymbol(builtin);
221
222 // Create a swizzle to "builtin.xy"
223 TVector<int> swizzleOffsetXY = {0, 1};
224 TIntermSwizzle *builtinXY = new TIntermSwizzle(builtinRef, swizzleOffsetXY);
225
226 // Create a symbol reference to our new variable that will hold the modified builtin.
227 const TType *type =
228 StaticType::GetForVec<EbtFloat, EbpHigh>(EvqGlobal, builtin->getType().getNominalSize());
229 TVariable *replacementVar =
230 new TVariable(symbolTable, flippedVariableName.rawName(), type, SymbolType::AngleInternal);
231 DeclareGlobalVariable(root, replacementVar);
232 TIntermSymbol *flippedBuiltinRef = new TIntermSymbol(replacementVar);
233
234 // Use this new variable instead of 'builtin' everywhere.
235 if (!ReplaceVariable(compiler, root, builtin, replacementVar))
236 {
237 return false;
238 }
239
240 // Create the expression "(builtin.xy - pivot) * flipXY + pivot
241 TIntermBinary *removePivot = new TIntermBinary(EOpSub, builtinXY, pivot);
242 TIntermBinary *inverseXY = new TIntermBinary(EOpMul, removePivot, flipXY);
243 TIntermBinary *plusPivot = new TIntermBinary(EOpAdd, inverseXY, pivot->deepCopy());
244
245 // Create the corrected variable and copy the value of the original builtin.
246 TIntermSequence sequence;
247 sequence.push_back(builtinRef->deepCopy());
248 TIntermAggregate *aggregate =
249 TIntermAggregate::CreateConstructor(builtin->getType(), &sequence);
250 TIntermBinary *assignment = new TIntermBinary(EOpAssign, flippedBuiltinRef, aggregate);
251
252 // Create an assignment to the replaced variable's .xy.
253 TIntermSwizzle *correctedXY =
254 new TIntermSwizzle(flippedBuiltinRef->deepCopy(), swizzleOffsetXY);
255 TIntermBinary *assignToY = new TIntermBinary(EOpAssign, correctedXY, plusPivot);
256
257 // Add this assigment at the beginning of the main function
258 insertSequence->insert(insertSequence->begin(), assignToY);
259 insertSequence->insert(insertSequence->begin(), assignment);
260
261 return compiler->validateAST(root);
262 }
263
InsertFragCoordCorrection(TCompiler * compiler,const ShCompileOptions & compileOptions,TIntermBlock * root,TIntermSequence * insertSequence,TSymbolTable * symbolTable,const DriverUniformMetal * driverUniforms)264 [[nodiscard]] bool InsertFragCoordCorrection(TCompiler *compiler,
265 const ShCompileOptions &compileOptions,
266 TIntermBlock *root,
267 TIntermSequence *insertSequence,
268 TSymbolTable *symbolTable,
269 const DriverUniformMetal *driverUniforms)
270 {
271 TIntermTyped *flipXY = driverUniforms->getFlipXY(symbolTable, DriverUniformFlip::Fragment);
272 TIntermTyped *pivot = driverUniforms->getHalfRenderArea();
273
274 const TVariable *fragCoord = static_cast<const TVariable *>(
275 symbolTable->findBuiltIn(ImmutableString("gl_FragCoord"), compiler->getShaderVersion()));
276 return FlipBuiltinVariable(compiler, root, insertSequence, flipXY, symbolTable, fragCoord,
277 kFlippedFragCoordName, pivot);
278 }
279
DeclareRightBeforeMain(TIntermBlock & root,const TVariable & var)280 void DeclareRightBeforeMain(TIntermBlock &root, const TVariable &var)
281 {
282 root.insertChildNodes(FindMainIndex(&root), {new TIntermDeclaration{&var}});
283 }
284
AddFragColorDeclaration(TIntermBlock & root,TSymbolTable & symbolTable,const TVariable & var)285 void AddFragColorDeclaration(TIntermBlock &root, TSymbolTable &symbolTable, const TVariable &var)
286 {
287 root.insertChildNodes(FindMainIndex(&root), TIntermSequence{new TIntermDeclaration{&var}});
288 }
289
AddBuiltInDeclaration(TIntermBlock & root,TSymbolTable & symbolTable,const TVariable & builtIn)290 void AddBuiltInDeclaration(TIntermBlock &root, TSymbolTable &symbolTable, const TVariable &builtIn)
291 {
292 // Check if the variable has been already declared.
293 const TIntermSymbol *builtInSymbol = new TIntermSymbol(&builtIn);
294 const TIntermSymbol *foundSymbol = FindSymbolNode(&root, builtIn.name());
295 if (foundSymbol && foundSymbol->uniqueId() != builtInSymbol->uniqueId())
296 {
297 return;
298 }
299 root.insertChildNodes(FindMainIndex(&root), TIntermSequence{new TIntermDeclaration{&builtIn}});
300 }
301
AddFragDepthEXTDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable)302 void AddFragDepthEXTDeclaration(TCompiler &compiler, TIntermBlock &root, TSymbolTable &symbolTable)
303 {
304 const TIntermSymbol *glFragDepthExt = FindSymbolNode(&root, ImmutableString("gl_FragDepthEXT"));
305 ASSERT(glFragDepthExt);
306 // Replace gl_FragData with our globally defined fragdata.
307 if (!ReplaceVariable(&compiler, &root, &(glFragDepthExt->variable()),
308 BuiltInVariable::gl_FragDepth()))
309 {
310 return;
311 }
312 AddBuiltInDeclaration(root, symbolTable, *BuiltInVariable::gl_FragDepth());
313 }
314
AddNumSamplesDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable)315 [[nodiscard]] bool AddNumSamplesDeclaration(TCompiler &compiler,
316 TIntermBlock &root,
317 TSymbolTable &symbolTable)
318 {
319 const TVariable *glNumSamples = BuiltInVariable::gl_NumSamples();
320 DeclareRightBeforeMain(root, *glNumSamples);
321
322 // gl_NumSamples = metal::get_num_samples();
323 TIntermBinary *assignment = new TIntermBinary(
324 TOperator::EOpAssign, new TIntermSymbol(glNumSamples),
325 CreateBuiltInFunctionCallNode("numSamples", {}, symbolTable, kESSLInternalBackendBuiltIns));
326 return RunAtTheBeginningOfShader(&compiler, &root, assignment);
327 }
328
AddSamplePositionDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms)329 [[nodiscard]] bool AddSamplePositionDeclaration(TCompiler &compiler,
330 TIntermBlock &root,
331 TSymbolTable &symbolTable,
332 const DriverUniformMetal *driverUniforms)
333 {
334 const TVariable *glSamplePosition = BuiltInVariable::gl_SamplePosition();
335 DeclareRightBeforeMain(root, *glSamplePosition);
336
337 // When rendering to a default FBO, gl_SamplePosition should
338 // be Y-flipped to match the actual sample location
339 // gl_SamplePosition = metal::get_sample_position(uint(gl_SampleID));
340 // gl_SamplePosition -= 0.5;
341 // gl_SamplePosition *= flipXY;
342 // gl_SamplePosition += 0.5;
343 TIntermBlock *block = new TIntermBlock;
344 block->appendStatement(new TIntermBinary(
345 TOperator::EOpAssign, new TIntermSymbol(glSamplePosition),
346 CreateBuiltInFunctionCallNode("samplePosition",
347 {TIntermAggregate::CreateConstructor(
348 *StaticType::GetBasic<EbtUInt, EbpHigh>(),
349 {new TIntermSymbol(BuiltInVariable::gl_SampleID())})},
350 symbolTable, kESSLInternalBackendBuiltIns)));
351 block->appendStatement(new TIntermBinary(TOperator::EOpSubAssign,
352 new TIntermSymbol(glSamplePosition),
353 CreateFloatNode(0.5f, EbpHigh)));
354 block->appendStatement(
355 new TIntermBinary(EOpMulAssign, new TIntermSymbol(glSamplePosition),
356 driverUniforms->getFlipXY(&symbolTable, DriverUniformFlip::Fragment)));
357 block->appendStatement(new TIntermBinary(TOperator::EOpAddAssign,
358 new TIntermSymbol(glSamplePosition),
359 CreateFloatNode(0.5f, EbpHigh)));
360 return RunAtTheBeginningOfShader(&compiler, &root, block);
361 }
362
AddSampleMaskInDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms,bool perSampleShading)363 [[nodiscard]] bool AddSampleMaskInDeclaration(TCompiler &compiler,
364 TIntermBlock &root,
365 TSymbolTable &symbolTable,
366 const DriverUniformMetal *driverUniforms,
367 bool perSampleShading)
368 {
369 // in highp int gl_SampleMaskIn[1]
370 const TVariable *glSampleMaskIn = static_cast<const TVariable *>(
371 symbolTable.findBuiltIn(ImmutableString("gl_SampleMaskIn"), compiler.getShaderVersion()));
372 DeclareRightBeforeMain(root, *glSampleMaskIn);
373
374 // Reference to gl_SampleMaskIn[0]
375 TIntermBinary *glSampleMaskIn0 =
376 new TIntermBinary(EOpIndexDirect, new TIntermSymbol(glSampleMaskIn), CreateIndexNode(0));
377
378 // When per-sample shading is active due to the use of a fragment input qualified
379 // by sample or due to the use of the gl_SampleID or gl_SamplePosition variables,
380 // only the bit for the current sample is set in gl_SampleMaskIn.
381 TIntermBlock *block = new TIntermBlock;
382 if (perSampleShading)
383 {
384 // gl_SampleMaskIn[0] = 1 << gl_SampleID;
385 block->appendStatement(new TIntermBinary(
386 EOpAssign, glSampleMaskIn0,
387 new TIntermBinary(EOpBitShiftLeft, CreateUIntNode(1),
388 new TIntermSymbol(BuiltInVariable::gl_SampleID()))));
389 }
390 else
391 {
392 // uint32_t ANGLE_metal_SampleMaskIn [[sample_mask]]
393 TVariable *angleSampleMaskIn = new TVariable(
394 &symbolTable, ImmutableString("metal_SampleMaskIn"),
395 new TType(EbtUInt, EbpHigh, EvqSampleMaskIn, 1), SymbolType::AngleInternal);
396 DeclareRightBeforeMain(root, *angleSampleMaskIn);
397
398 // gl_SampleMaskIn[0] = ANGLE_metal_SampleMaskIn;
399 block->appendStatement(
400 new TIntermBinary(EOpAssign, glSampleMaskIn0, new TIntermSymbol(angleSampleMaskIn)));
401 }
402
403 // Bits in the sample mask corresponding to covered samples
404 // that will be unset due to SAMPLE_COVERAGE or SAMPLE_MASK
405 // will not be set (section 4.1.3).
406 // if (ANGLEMultisampledRendering)
407 // {
408 // gl_SampleMaskIn[0] &= ANGLE_angleUniforms.coverageMask;
409 // }
410 TIntermBlock *coverageBlock = new TIntermBlock;
411 coverageBlock->appendStatement(new TIntermBinary(
412 EOpBitwiseAndAssign, glSampleMaskIn0->deepCopy(), driverUniforms->getCoverageMaskField()));
413
414 TVariable *sampleMaskEnabledVar = new TVariable(
415 &symbolTable, sh::ImmutableString(mtl::kMultisampledRenderingConstName),
416 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(), SymbolType::AngleInternal);
417 block->appendStatement(
418 new TIntermIfElse(new TIntermSymbol(sampleMaskEnabledVar), coverageBlock, nullptr));
419
420 return RunAtTheBeginningOfShader(&compiler, &root, block);
421 }
422
AddSampleMaskDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms,bool includeEmulateAlphaToCoverage,bool usesSampleMask)423 [[nodiscard]] bool AddSampleMaskDeclaration(TCompiler &compiler,
424 TIntermBlock &root,
425 TSymbolTable &symbolTable,
426 const DriverUniformMetal *driverUniforms,
427 bool includeEmulateAlphaToCoverage,
428 bool usesSampleMask)
429 {
430 // uint32_t ANGLE_metal_SampleMask [[sample_mask]]
431 TVariable *angleSampleMask =
432 new TVariable(&symbolTable, ImmutableString("metal_SampleMask"),
433 new TType(EbtUInt, EbpHigh, EvqSampleMask, 1), SymbolType::AngleInternal);
434 DeclareRightBeforeMain(root, *angleSampleMask);
435
436 // Write all-enabled sample mask even for single-sampled rendering
437 // when the shader uses derivatives to workaround a driver bug.
438 if (compiler.usesDerivatives())
439 {
440 TIntermBlock *helperAssignBlock = new TIntermBlock;
441 helperAssignBlock->appendStatement(new TIntermBinary(
442 EOpAssign, new TIntermSymbol(angleSampleMask), CreateUIntNode(0xFFFFFFFFu)));
443
444 TVariable *writeHelperSampleMaskVar =
445 new TVariable(&symbolTable, sh::ImmutableString(mtl::kWriteHelperSampleMaskConstName),
446 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(),
447 SymbolType::AngleInternal);
448
449 if (!RunAtTheBeginningOfShader(
450 &compiler, &root,
451 new TIntermIfElse(new TIntermSymbol(writeHelperSampleMaskVar), helperAssignBlock,
452 nullptr)))
453 {
454 return false;
455 }
456 }
457
458 // ANGLE_metal_SampleMask = ANGLE_angleUniforms.coverageMask;
459 TIntermBlock *block = new TIntermBlock;
460 block->appendStatement(new TIntermBinary(EOpAssign, new TIntermSymbol(angleSampleMask),
461 driverUniforms->getCoverageMaskField()));
462 if (usesSampleMask)
463 {
464 // out highp int gl_SampleMask[1];
465 const TVariable *glSampleMask = static_cast<const TVariable *>(
466 symbolTable.findBuiltIn(ImmutableString("gl_SampleMask"), compiler.getShaderVersion()));
467 DeclareRightBeforeMain(root, *glSampleMask);
468
469 // ANGLE_metal_SampleMask &= gl_SampleMask[0];
470 TIntermBinary *glSampleMask0 =
471 new TIntermBinary(EOpIndexDirect, new TIntermSymbol(glSampleMask), CreateIndexNode(0));
472 block->appendStatement(new TIntermBinary(
473 EOpBitwiseAndAssign, new TIntermSymbol(angleSampleMask), glSampleMask0));
474 }
475
476 if (includeEmulateAlphaToCoverage)
477 {
478 // Some Metal drivers ignore alpha-to-coverage state when a fragment
479 // shader writes to [[sample_mask]]. Moreover, Metal pipeline state
480 // does not support setting a global coverage mask, which would be used
481 // for emulating GL_SAMPLE_COVERAGE, so [[sample_mask]] is used instead.
482 // To support alpha-to-coverage regardless of the [[sample_mask]] usage,
483 // the former is always emulated on such drivers.
484 TIntermBlock *alphaBlock = new TIntermBlock;
485
486 // To reduce image artifacts due to regular coverage sample locations,
487 // alpha value thresholds that toggle individual samples are slightly
488 // different within 2x2 pixel blocks. Consider MSAAx4, for example.
489 // Instead of always enabling samples on evenly distributed alpha
490 // values like {51, 102, 153, 204} these thresholds may vary as follows
491 //
492 // Sample 0 Sample 1 Sample 2 Sample 3
493 // ----- ----- ----- ----- ----- ----- ----- -----
494 // | 7.5| 39.5| | 71.5|103.5| |135.5|167.5| |199.5|231.5|
495 // |----- -----| |----- -----| |----- -----| |----- -----|
496 // | 55.5| 23.5| |119.5| 87.5| |183.5|151.5| |247.5|215.5|
497 // ----- ----- ----- ----- ----- ----- ----- -----
498 // These threshold values may be expressed as
499 // 7.5 + P * 16 + 64 * sampleID
500 // where P is
501 // ((x << 1) - (y & 1)) & 3
502 // and constant values depend on the number of samples used.
503 TVariable *p = CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
504 TVariable *y = CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
505 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
506 p, new TIntermSwizzle(new TIntermSymbol(BuiltInVariable::gl_FragCoord()), {0})));
507 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
508 y, new TIntermSwizzle(new TIntermSymbol(BuiltInVariable::gl_FragCoord()), {1})));
509 alphaBlock->appendStatement(
510 new TIntermBinary(EOpBitShiftLeftAssign, new TIntermSymbol(p), CreateIndexNode(1)));
511 alphaBlock->appendStatement(
512 new TIntermBinary(EOpBitwiseAndAssign, new TIntermSymbol(y), CreateIndexNode(1)));
513 alphaBlock->appendStatement(
514 new TIntermBinary(EOpSubAssign, new TIntermSymbol(p), new TIntermSymbol(y)));
515 alphaBlock->appendStatement(
516 new TIntermBinary(EOpBitwiseAndAssign, new TIntermSymbol(p), CreateIndexNode(3)));
517
518 // This internal variable, defined in-text in the function constants section,
519 // will point to the alpha channel of the color zero output. Due to potential
520 // EXT_blend_func_extended usage, the exact variable may be unknown until the
521 // program is linked.
522 TVariable *alpha0 =
523 new TVariable(&symbolTable, sh::ImmutableString("ALPHA0"),
524 StaticType::Get<EbtFloat, EbpUndefined, EvqSpecConst, 1, 1>(),
525 SymbolType::AngleInternal);
526
527 // Use metal::saturate to clamp the alpha value to [0.0, 1.0] and scale it
528 // to [0.0, 510.0] since further operations expect an integer alpha value.
529 TVariable *alphaScaled =
530 CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtFloat, EbpHigh>());
531 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
532 alphaScaled, CreateBuiltInFunctionCallNode("saturate", {new TIntermSymbol(alpha0)},
533 symbolTable, kESSLInternalBackendBuiltIns)));
534 alphaBlock->appendStatement(new TIntermBinary(EOpMulAssign, new TIntermSymbol(alphaScaled),
535 CreateFloatNode(510.0, EbpUndefined)));
536 // int alphaMask = int(alphaScaled);
537 TVariable *alphaMask =
538 CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
539 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
540 alphaMask, TIntermAggregate::CreateConstructor(*StaticType::GetBasic<EbtInt, EbpHigh>(),
541 {new TIntermSymbol(alphaScaled)})));
542
543 // Next operations depend on the number of samples in the curent render target.
544 TIntermBlock *switchBlock = new TIntermBlock();
545
546 auto computeNumberOfSamples = [&](int step, int bias, int scale) {
547 switchBlock->appendStatement(new TIntermBinary(
548 EOpBitShiftLeftAssign, new TIntermSymbol(p), CreateIndexNode(step)));
549 switchBlock->appendStatement(new TIntermBinary(
550 EOpAddAssign, new TIntermSymbol(alphaMask), CreateIndexNode(bias)));
551 switchBlock->appendStatement(new TIntermBinary(
552 EOpSubAssign, new TIntermSymbol(alphaMask), new TIntermSymbol(p)));
553 switchBlock->appendStatement(new TIntermBinary(
554 EOpBitShiftRightAssign, new TIntermSymbol(alphaMask), CreateIndexNode(scale)));
555 };
556
557 // MSAAx2
558 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(2)));
559
560 // Canonical threshold values are
561 // 15.5 + P * 32 + 128 * sampleID
562 // With alpha values scaled to [0, 510], the number of covered samples is
563 // (alphaScaled + 256 - (31 + P * 64)) / 256
564 // which could be simplified to
565 // (alphaScaled + 225 - (P << 6)) >> 8
566 computeNumberOfSamples(6, 225, 8);
567
568 // In a case of only two samples, the coverage mask is
569 // mask = (num_covered_samples * 3) >> 1
570 switchBlock->appendStatement(
571 new TIntermBinary(EOpMulAssign, new TIntermSymbol(alphaMask), CreateIndexNode(3)));
572 switchBlock->appendStatement(new TIntermBinary(
573 EOpBitShiftRightAssign, new TIntermSymbol(alphaMask), CreateIndexNode(1)));
574
575 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
576
577 // MSAAx4
578 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(4)));
579
580 // Canonical threshold values are
581 // 7.5 + P * 16 + 64 * sampleID
582 // With alpha values scaled to [0, 510], the number of covered samples is
583 // (alphaScaled + 128 - (15 + P * 32)) / 128
584 // which could be simplified to
585 // (alphaScaled + 113 - (P << 5)) >> 7
586 computeNumberOfSamples(5, 113, 7);
587
588 // When two out of four samples should be covered, prioritize
589 // those that are located in the opposite corners of a pixel.
590 // 0: 0000, 1: 0001, 2: 1001, 3: 1011, 4: 1111
591 // mask = (0xFB910 >> (num_covered_samples * 4)) & 0xF
592 // The final AND may be omitted because the rasterizer output
593 // is limited to four samples.
594 switchBlock->appendStatement(new TIntermBinary(
595 EOpBitShiftLeftAssign, new TIntermSymbol(alphaMask), CreateIndexNode(2)));
596 switchBlock->appendStatement(
597 new TIntermBinary(EOpAssign, new TIntermSymbol(alphaMask),
598 new TIntermBinary(EOpBitShiftRight, CreateIndexNode(0xFB910),
599 new TIntermSymbol(alphaMask))));
600
601 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
602
603 // MSAAx8
604 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(8)));
605
606 // Canonical threshold values are
607 // 3.5 + P * 8 + 32 * sampleID
608 // With alpha values scaled to [0, 510], the number of covered samples is
609 // (alphaScaled + 64 - (7 + P * 16)) / 64
610 // which could be simplified to
611 // (alphaScaled + 57 - (P << 4)) >> 6
612 computeNumberOfSamples(4, 57, 6);
613
614 // When eight samples are used, they could be enabled one by one
615 // mask = ~(0xFFFFFFFF << num_covered_samples)
616 switchBlock->appendStatement(
617 new TIntermBinary(EOpAssign, new TIntermSymbol(alphaMask),
618 new TIntermBinary(EOpBitShiftLeft, CreateUIntNode(0xFFFFFFFFu),
619 new TIntermSymbol(alphaMask))));
620 switchBlock->appendStatement(new TIntermBinary(
621 EOpAssign, new TIntermSymbol(alphaMask),
622 new TIntermUnary(EOpBitwiseNot, new TIntermSymbol(alphaMask), nullptr)));
623
624 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
625
626 alphaBlock->getSequence()->push_back(
627 new TIntermSwitch(CreateBuiltInFunctionCallNode("numSamples", {}, symbolTable,
628 kESSLInternalBackendBuiltIns),
629 switchBlock));
630
631 alphaBlock->appendStatement(new TIntermBinary(
632 EOpBitwiseAndAssign, new TIntermSymbol(angleSampleMask), new TIntermSymbol(alphaMask)));
633
634 TIntermBlock *emulateAlphaToCoverageEnabledBlock = new TIntermBlock;
635 emulateAlphaToCoverageEnabledBlock->appendStatement(
636 new TIntermIfElse(driverUniforms->getAlphaToCoverage(), alphaBlock, nullptr));
637
638 TVariable *emulateAlphaToCoverageVar =
639 new TVariable(&symbolTable, sh::ImmutableString(mtl::kEmulateAlphaToCoverageConstName),
640 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(),
641 SymbolType::AngleInternal);
642 TIntermIfElse *useAlphaToCoverage =
643 new TIntermIfElse(new TIntermSymbol(emulateAlphaToCoverageVar),
644 emulateAlphaToCoverageEnabledBlock, nullptr);
645
646 block->appendStatement(useAlphaToCoverage);
647 }
648
649 // Sample mask assignment is guarded by ANGLEMultisampledRendering specialization constant
650 TVariable *multisampledRenderingVar = new TVariable(
651 &symbolTable, sh::ImmutableString(mtl::kMultisampledRenderingConstName),
652 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(), SymbolType::AngleInternal);
653 return RunAtTheEndOfShader(
654 &compiler, &root,
655 new TIntermIfElse(new TIntermSymbol(multisampledRenderingVar), block, nullptr),
656 &symbolTable);
657 }
658
AddFragDataDeclaration(TCompiler & compiler,TIntermBlock & root,bool usesSecondary,bool secondary)659 [[nodiscard]] bool AddFragDataDeclaration(TCompiler &compiler,
660 TIntermBlock &root,
661 bool usesSecondary,
662 bool secondary)
663 {
664 TSymbolTable &symbolTable = compiler.getSymbolTable();
665 const int maxDrawBuffers = usesSecondary ? compiler.getResources().MaxDualSourceDrawBuffers
666 : compiler.getResources().MaxDrawBuffers;
667 TType *gl_FragDataType =
668 new TType(EbtFloat, EbpMedium, secondary ? EvqSecondaryFragDataEXT : EvqFragData, 4, 1);
669 std::vector<const TVariable *> glFragDataSlots;
670 TIntermSequence declareGLFragdataSequence;
671
672 // Create gl_FragData_i or gl_SecondaryFragDataEXT_i
673 const char *fragData = "gl_FragData";
674 const char *secondaryFragDataEXT = "gl_SecondaryFragDataEXT";
675 const char *name = secondary ? secondaryFragDataEXT : fragData;
676 for (int i = 0; i < maxDrawBuffers; i++)
677 {
678 ImmutableStringBuilder builder(strlen(name) + 3);
679 builder << name << "_";
680 builder.appendDecimal(i);
681 const TVariable *glFragData =
682 new TVariable(&symbolTable, builder, gl_FragDataType, SymbolType::AngleInternal,
683 TExtension::UNDEFINED);
684 glFragDataSlots.push_back(glFragData);
685 declareGLFragdataSequence.push_back(new TIntermDeclaration{glFragData});
686 }
687 root.insertChildNodes(FindMainIndex(&root), declareGLFragdataSequence);
688
689 // Create an internal gl_FragData array type, compatible with indexing syntax.
690 TType *gl_FragDataTypeArray = new TType(EbtFloat, EbpMedium, EvqGlobal, 4, 1);
691 gl_FragDataTypeArray->makeArray(maxDrawBuffers);
692 const TVariable *glFragDataGlobal = new TVariable(&symbolTable, ImmutableString(name),
693 gl_FragDataTypeArray, SymbolType::BuiltIn);
694
695 DeclareGlobalVariable(&root, glFragDataGlobal);
696 const TIntermSymbol *originalGLFragData = FindSymbolNode(&root, ImmutableString(name));
697 ASSERT(originalGLFragData);
698
699 // Replace gl_FragData[] or gl_SecondaryFragDataEXT[] with our globally defined variable
700 if (!ReplaceVariable(&compiler, &root, &(originalGLFragData->variable()), glFragDataGlobal))
701 {
702 return false;
703 }
704
705 // Assign each array attribute to an output
706 TIntermBlock *insertSequence = new TIntermBlock();
707 for (int i = 0; i < maxDrawBuffers; i++)
708 {
709 TIntermTyped *glFragDataSlot = new TIntermSymbol(glFragDataSlots[i]);
710 TIntermTyped *glFragDataGlobalSymbol = new TIntermSymbol(glFragDataGlobal);
711 auto &access = AccessIndex(*glFragDataGlobalSymbol, i);
712 TIntermBinary *assignment =
713 new TIntermBinary(TOperator::EOpAssign, glFragDataSlot, &access);
714 insertSequence->appendStatement(assignment);
715 }
716 return RunAtTheEndOfShader(&compiler, &root, insertSequence, &symbolTable);
717 }
718
AppendVertexShaderTransformFeedbackOutputToMain(TCompiler & compiler,SymbolEnv & mSymbolEnv,TIntermBlock & root)719 [[nodiscard]] bool AppendVertexShaderTransformFeedbackOutputToMain(TCompiler &compiler,
720 SymbolEnv &mSymbolEnv,
721 TIntermBlock &root)
722 {
723 TSymbolTable &symbolTable = compiler.getSymbolTable();
724
725 // Append the assignment as a statement at the end of the shader.
726 return RunAtTheEndOfShader(&compiler, &root,
727 &(mSymbolEnv.callFunctionOverload(Name("@@XFB-OUT@@"), *new TType(),
728 *new TIntermSequence())),
729 &symbolTable);
730 }
731
732 // Unlike Vulkan having auto viewport flipping extension, in Metal we have to flip gl_Position.y
733 // manually.
734 // This operation performs flipping the gl_Position.y using this expression:
735 // gl_Position.y = gl_Position.y * negViewportScaleY
AppendVertexShaderPositionYCorrectionToMain(TCompiler * compiler,TIntermBlock * root,TSymbolTable * symbolTable,TIntermTyped * negFlipY)736 [[nodiscard]] bool AppendVertexShaderPositionYCorrectionToMain(TCompiler *compiler,
737 TIntermBlock *root,
738 TSymbolTable *symbolTable,
739 TIntermTyped *negFlipY)
740 {
741 // Create a symbol reference to "gl_Position"
742 const TVariable *position = BuiltInVariable::gl_Position();
743 TIntermSymbol *positionRef = new TIntermSymbol(position);
744
745 // Create a swizzle to "gl_Position.y"
746 TVector<int> swizzleOffsetY;
747 swizzleOffsetY.push_back(1);
748 TIntermSwizzle *positionY = new TIntermSwizzle(positionRef, swizzleOffsetY);
749
750 // Create the expression "gl_Position.y * negFlipY"
751 TIntermBinary *inverseY = new TIntermBinary(EOpMul, positionY->deepCopy(), negFlipY);
752
753 // Create the assignment "gl_Position.y = gl_Position.y * negViewportScaleY
754 TIntermTyped *positionYLHS = positionY->deepCopy();
755 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionYLHS, inverseY);
756
757 // Append the assignment as a statement at the end of the shader.
758 return RunAtTheEndOfShader(compiler, root, assignment, symbolTable);
759 }
760
EmulateClipDistanceVaryings(TCompiler * compiler,TIntermBlock * root,TSymbolTable * symbolTable,const GLenum shaderType)761 [[nodiscard]] bool EmulateClipDistanceVaryings(TCompiler *compiler,
762 TIntermBlock *root,
763 TSymbolTable *symbolTable,
764 const GLenum shaderType)
765 {
766 ASSERT(shaderType == GL_VERTEX_SHADER || shaderType == GL_FRAGMENT_SHADER);
767
768 const TIntermSymbol *symbolNode = FindSymbolNode(root, ImmutableString("gl_ClipDistance"));
769 ASSERT(symbolNode != nullptr);
770 const TVariable *clipDistanceVar = &symbolNode->variable();
771
772 const bool fragment = shaderType == GL_FRAGMENT_SHADER;
773 if (fragment)
774 {
775 TType *globalType = new TType(EbtFloat, EbpHigh, EvqGlobal, 1, 1);
776 globalType->toArrayBaseType();
777 globalType->makeArray(compiler->getClipDistanceArraySize());
778
779 const TVariable *globalVar = new TVariable(symbolTable, ImmutableString("ClipDistance"),
780 globalType, SymbolType::AngleInternal);
781
782 if (!ReplaceVariable(compiler, root, clipDistanceVar, globalVar))
783 {
784 return false;
785 }
786 clipDistanceVar = globalVar;
787 }
788
789 TIntermBlock *assignBlock = new TIntermBlock();
790 size_t index = FindMainIndex(root);
791 TIntermSymbol *arraySym = new TIntermSymbol(clipDistanceVar);
792 TType *type = new TType(EbtFloat, EbpHigh, fragment ? EvqFragmentIn : EvqVertexOut, 1, 1);
793 for (uint8_t i = 0; i < compiler->getClipDistanceArraySize(); i++)
794 {
795 std::stringstream name;
796 name << "ClipDistance_" << static_cast<int>(i);
797 TIntermSymbol *varyingSym = new TIntermSymbol(new TVariable(
798 symbolTable, ImmutableString(name.str()), type, SymbolType::AngleInternal));
799
800 TIntermDeclaration *varyingDecl = new TIntermDeclaration();
801 varyingDecl->appendDeclarator(varyingSym->deepCopy());
802 root->insertStatement(index++, varyingDecl);
803
804 TIntermTyped *arrayAccess = new TIntermBinary(EOpIndexDirect, arraySym, CreateIndexNode(i));
805 assignBlock->appendStatement(new TIntermBinary(
806 EOpAssign, fragment ? arrayAccess : varyingSym, fragment ? varyingSym : arrayAccess));
807 }
808
809 return fragment ? RunAtTheBeginningOfShader(compiler, root, assignBlock)
810 : RunAtTheEndOfShader(compiler, root, assignBlock, symbolTable);
811 }
812 } // namespace
813
814 namespace mtl
815 {
getTranslatorMetalReflection(const TCompiler * compiler)816 TranslatorMetalReflection *getTranslatorMetalReflection(const TCompiler *compiler)
817 {
818 return ((TranslatorMSL *)compiler)->getTranslatorMetalReflection();
819 }
820 } // namespace mtl
TranslatorMSL(sh::GLenum type,ShShaderSpec spec,ShShaderOutput output)821 TranslatorMSL::TranslatorMSL(sh::GLenum type, ShShaderSpec spec, ShShaderOutput output)
822 : TCompiler(type, spec, output)
823 {}
824
insertRasterizationDiscardLogic(TIntermBlock & root)825 [[nodiscard]] bool TranslatorMSL::insertRasterizationDiscardLogic(TIntermBlock &root)
826 {
827 // This transformation leaves the tree in an inconsistent state by using a variable that's
828 // defined in text, outside of the knowledge of the AST.
829 mValidateASTOptions.validateVariableReferences = false;
830
831 TSymbolTable *symbolTable = &getSymbolTable();
832
833 TType *boolType = new TType(EbtBool);
834 boolType->setQualifier(EvqConst);
835 TVariable *discardEnabledVar =
836 new TVariable(symbolTable, sh::ImmutableString(sh::mtl::kRasterizerDiscardEnabledConstName),
837 boolType, SymbolType::AngleInternal);
838
839 const TVariable *position = BuiltInVariable::gl_Position();
840 TIntermSymbol *positionRef = new TIntermSymbol(position);
841
842 // Create vec4(-3, -3, -3, 1):
843 auto vec4Type = new TType(EbtFloat, 4);
844 TIntermSequence vec4Args = {
845 CreateFloatNode(-3.0f, EbpMedium),
846 CreateFloatNode(-3.0f, EbpMedium),
847 CreateFloatNode(-3.0f, EbpMedium),
848 CreateFloatNode(1.0f, EbpMedium),
849 };
850 TIntermAggregate *constVarConstructor =
851 TIntermAggregate::CreateConstructor(*vec4Type, &vec4Args);
852
853 // Create the assignment "gl_Position = vec4(-3, -3, -3, 1)"
854 TIntermBinary *assignment =
855 new TIntermBinary(TOperator::EOpAssign, positionRef->deepCopy(), constVarConstructor);
856
857 TIntermBlock *discardBlock = new TIntermBlock;
858 discardBlock->appendStatement(assignment);
859
860 TIntermSymbol *discardEnabled = new TIntermSymbol(discardEnabledVar);
861 TIntermIfElse *ifCall = new TIntermIfElse(discardEnabled, discardBlock, nullptr);
862
863 return RunAtTheEndOfShader(this, &root, ifCall, symbolTable);
864 }
865
866 // Metal needs to inverse the depth if depthRange is is reverse order, i.e. depth near > depth far
867 // This is achieved by multiply the depth value with scale value stored in
868 // driver uniform's depthRange.reserved
transformDepthBeforeCorrection(TIntermBlock * root,const DriverUniformMetal * driverUniforms)869 bool TranslatorMSL::transformDepthBeforeCorrection(TIntermBlock *root,
870 const DriverUniformMetal *driverUniforms)
871 {
872 // Create a symbol reference to "gl_Position"
873 const TVariable *position = BuiltInVariable::gl_Position();
874 TIntermSymbol *positionRef = new TIntermSymbol(position);
875
876 // Create a swizzle to "gl_Position.z"
877 TVector<int> swizzleOffsetZ = {2};
878 TIntermSwizzle *positionZ = new TIntermSwizzle(positionRef, swizzleOffsetZ);
879
880 // Create a ref to "zscale"
881 TIntermTyped *viewportZScale = driverUniforms->getViewportZScale();
882
883 // Create the expression "gl_Position.z * zscale".
884 TIntermBinary *zScale = new TIntermBinary(EOpMul, positionZ->deepCopy(), viewportZScale);
885
886 // Create the assignment "gl_Position.z = gl_Position.z * zscale"
887 TIntermTyped *positionZLHS = positionZ->deepCopy();
888 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionZLHS, zScale);
889
890 // Append the assignment as a statement at the end of the shader.
891 return RunAtTheEndOfShader(this, root, assignment, &getSymbolTable());
892 }
893
894 // This operation performs the viewport depth translation needed by Metal. GL uses a
895 // clip space z range of -1 to +1 where as Metal uses 0 to 1. The translation becomes
896 // this expression
897 //
898 // z_metal = 0.5 * (w_gl + z_gl)
899 //
900 // where z_metal is the depth output of a Metal vertex shader and z_gl is the same for GL.
901 // This operation is skipped when GL_CLIP_DEPTH_MODE_EXT is set to GL_ZERO_TO_ONE_EXT.
appendVertexShaderDepthCorrectionToMain(TIntermBlock * root,const DriverUniformMetal * driverUniforms)902 bool TranslatorMSL::appendVertexShaderDepthCorrectionToMain(
903 TIntermBlock *root,
904 const DriverUniformMetal *driverUniforms)
905 {
906 const TVariable *position = BuiltInVariable::gl_Position();
907 TIntermSymbol *positionRef = new TIntermSymbol(position);
908
909 TVector<int> swizzleOffsetZ = {2};
910 TIntermSwizzle *positionZ = new TIntermSwizzle(positionRef, swizzleOffsetZ);
911
912 TIntermConstantUnion *oneHalf = CreateFloatNode(0.5f, EbpMedium);
913
914 TVector<int> swizzleOffsetW = {3};
915 TIntermSwizzle *positionW = new TIntermSwizzle(positionRef->deepCopy(), swizzleOffsetW);
916
917 // Create the expression "(gl_Position.z + gl_Position.w) * 0.5".
918 TIntermBinary *zPlusW = new TIntermBinary(EOpAdd, positionZ->deepCopy(), positionW->deepCopy());
919 TIntermBinary *halfZPlusW = new TIntermBinary(EOpMul, zPlusW, oneHalf->deepCopy());
920
921 // Create the assignment "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5"
922 TIntermTyped *positionZLHS = positionZ->deepCopy();
923 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionZLHS, halfZPlusW);
924
925 // Apply depth correction if needed
926 TIntermBlock *block = new TIntermBlock;
927 block->appendStatement(assignment);
928 TIntermIfElse *ifCall = new TIntermIfElse(driverUniforms->getTransformDepth(), block, nullptr);
929
930 // Append the assignment as a statement at the end of the shader.
931 return RunAtTheEndOfShader(this, root, ifCall, &getSymbolTable());
932 }
933
metalShaderTypeFromGLSL(sh::GLenum shaderType)934 static inline MetalShaderType metalShaderTypeFromGLSL(sh::GLenum shaderType)
935 {
936 switch (shaderType)
937 {
938 case GL_VERTEX_SHADER:
939 return MetalShaderType::Vertex;
940 case GL_FRAGMENT_SHADER:
941 return MetalShaderType::Fragment;
942 case GL_COMPUTE_SHADER:
943 ASSERT(0 && "compute shaders not currently supported");
944 return MetalShaderType::Compute;
945 default:
946 ASSERT(0 && "Invalid shader type.");
947 return MetalShaderType::None;
948 }
949 }
950
translateImpl(TInfoSinkBase & sink,TIntermBlock * root,const ShCompileOptions & compileOptions,PerformanceDiagnostics *,SpecConst * specConst,DriverUniformMetal * driverUniforms)951 bool TranslatorMSL::translateImpl(TInfoSinkBase &sink,
952 TIntermBlock *root,
953 const ShCompileOptions &compileOptions,
954 PerformanceDiagnostics * /*perfDiagnostics*/,
955 SpecConst *specConst,
956 DriverUniformMetal *driverUniforms)
957 {
958 TSymbolTable &symbolTable = getSymbolTable();
959 IdGen idGen;
960 ProgramPreludeConfig ppc(metalShaderTypeFromGLSL(getShaderType()));
961 ppc.usesDerivatives = usesDerivatives();
962
963 if (!WrapMain(*this, idGen, *root))
964 {
965 return false;
966 }
967
968 // Remove declarations of inactive shader interface variables so glslang wrapper doesn't need to
969 // replace them. Note: this is done before extracting samplers from structs, as removing such
970 // inactive samplers is not yet supported. Note also that currently, CollectVariables marks
971 // every field of an active uniform that's of struct type as active, i.e. no extracted sampler
972 // is inactive.
973 if (!RemoveInactiveInterfaceVariables(this, root, &getSymbolTable(), getAttributes(),
974 getInputVaryings(), getOutputVariables(), getUniforms(),
975 getInterfaceBlocks(), false))
976 {
977 return false;
978 }
979
980 // Write out default uniforms into a uniform block assigned to a specific set/binding.
981 int aggregateTypesUsedForUniforms = 0;
982 int atomicCounterCount = 0;
983 for (const auto &uniform : getUniforms())
984 {
985 if (uniform.isStruct() || uniform.isArrayOfArrays())
986 {
987 ++aggregateTypesUsedForUniforms;
988 }
989
990 if (uniform.active && gl::IsAtomicCounterType(uniform.type))
991 {
992 ++atomicCounterCount;
993 }
994 }
995
996 // If there are any function calls that take array-of-array of opaque uniform parameters, or
997 // other opaque uniforms that need special handling in Vulkan, such as atomic counters,
998 // monomorphize the functions by removing said parameters and replacing them in the function
999 // body with the call arguments.
1000 //
1001 // This has a few benefits:
1002 //
1003 // - It dramatically simplifies future transformations w.r.t to samplers in structs, array of
1004 // arrays of opaque types, atomic counters etc.
1005 // - Avoids the need for shader*ArrayDynamicIndexing Vulkan features.
1006 UnsupportedFunctionArgsBitSet args{UnsupportedFunctionArgs::StructContainingSamplers,
1007 UnsupportedFunctionArgs::ArrayOfArrayOfSamplerOrImage,
1008 UnsupportedFunctionArgs::AtomicCounter,
1009 UnsupportedFunctionArgs::SamplerCubeEmulation,
1010 UnsupportedFunctionArgs::Image};
1011 if (!MonomorphizeUnsupportedFunctions(this, root, &getSymbolTable(), compileOptions, args))
1012 {
1013 return false;
1014 }
1015
1016 if (aggregateTypesUsedForUniforms > 0)
1017 {
1018 if (!NameEmbeddedStructUniformsMetal(this, root, &symbolTable))
1019 {
1020 return false;
1021 }
1022
1023 if (!SeparateStructFromUniformDeclarations(this, root, &getSymbolTable()))
1024 {
1025 return false;
1026 }
1027
1028 int removedUniformsCount;
1029
1030 if (!RewriteStructSamplers(this, root, &getSymbolTable(), &removedUniformsCount))
1031 {
1032 return false;
1033 }
1034 }
1035
1036 // Replace array of array of opaque uniforms with a flattened array. This is run after
1037 // MonomorphizeUnsupportedFunctions and RewriteStructSamplers so that it's not possible for an
1038 // array of array of opaque type to be partially subscripted and passed to a function.
1039 if (!RewriteArrayOfArrayOfOpaqueUniforms(this, root, &getSymbolTable()))
1040 {
1041 return false;
1042 }
1043
1044 if (compileOptions.emulateSeamfulCubeMapSampling)
1045 {
1046 if (!RewriteCubeMapSamplersAs2DArray(this, root, &symbolTable,
1047 getShaderType() == GL_FRAGMENT_SHADER))
1048 {
1049 return false;
1050 }
1051 }
1052
1053 if (getShaderVersion() >= 300 ||
1054 IsExtensionEnabled(getExtensionBehavior(), TExtension::EXT_shader_texture_lod))
1055 {
1056 if (compileOptions.preTransformTextureCubeGradDerivatives)
1057 {
1058 if (!PreTransformTextureCubeGradDerivatives(this, root, &symbolTable,
1059 getShaderVersion()))
1060 {
1061 return false;
1062 }
1063 }
1064 }
1065
1066 if (getShaderType() == GL_COMPUTE_SHADER)
1067 {
1068 driverUniforms->addComputeDriverUniformsToShader(root, &getSymbolTable());
1069 }
1070 else
1071 {
1072 driverUniforms->addGraphicsDriverUniformsToShader(root, &getSymbolTable());
1073 }
1074
1075 if (atomicCounterCount > 0)
1076 {
1077 const TIntermTyped *acbBufferOffsets = driverUniforms->getAcbBufferOffsets();
1078 if (!RewriteAtomicCounters(this, root, &symbolTable, acbBufferOffsets, nullptr))
1079 {
1080 return false;
1081 }
1082 }
1083 else if (getShaderVersion() >= 310)
1084 {
1085 // Vulkan doesn't support Atomic Storage as a Storage Class, but we've seen
1086 // cases where builtins are using it even with no active atomic counters.
1087 // This pass simply removes those builtins in that scenario.
1088 if (!RemoveAtomicCounterBuiltins(this, root))
1089 {
1090 return false;
1091 }
1092 }
1093
1094 if (getShaderType() != GL_COMPUTE_SHADER)
1095 {
1096 if (!ReplaceGLDepthRangeWithDriverUniform(this, root, driverUniforms, &getSymbolTable()))
1097 {
1098 return false;
1099 }
1100 }
1101
1102 {
1103 bool usesInstanceId = false;
1104 bool usesVertexId = false;
1105 for (const ShaderVariable &var : mAttributes)
1106 {
1107 if (var.isBuiltIn())
1108 {
1109 if (var.name == "gl_InstanceID")
1110 {
1111 usesInstanceId = true;
1112 }
1113 if (var.name == "gl_VertexID")
1114 {
1115 usesVertexId = true;
1116 }
1117 }
1118 }
1119
1120 if (usesInstanceId)
1121 {
1122 root->insertChildNodes(
1123 FindMainIndex(root),
1124 TIntermSequence{new TIntermDeclaration{BuiltInVariable::gl_InstanceID()}});
1125 }
1126 if (usesVertexId)
1127 {
1128 AddBuiltInDeclaration(*root, symbolTable, *BuiltInVariable::gl_VertexID());
1129 }
1130 }
1131 SymbolEnv symbolEnv(*this, *root);
1132
1133 bool usesSampleMask = false;
1134 if (getShaderType() == GL_FRAGMENT_SHADER)
1135 {
1136 bool usesPointCoord = false;
1137 bool usesFragCoord = false;
1138 bool usesFrontFacing = false;
1139 bool usesSampleID = false;
1140 bool usesSamplePosition = false;
1141 bool usesSampleMaskIn = false;
1142 for (const ShaderVariable &inputVarying : mInputVaryings)
1143 {
1144 if (inputVarying.isBuiltIn())
1145 {
1146 if (inputVarying.name == "gl_PointCoord")
1147 {
1148 usesPointCoord = true;
1149 }
1150 else if (inputVarying.name == "gl_FragCoord")
1151 {
1152 usesFragCoord = true;
1153 }
1154 else if (inputVarying.name == "gl_FrontFacing")
1155 {
1156 usesFrontFacing = true;
1157 }
1158 else if (inputVarying.name == "gl_SampleID")
1159 {
1160 usesSampleID = true;
1161 }
1162 else if (inputVarying.name == "gl_SamplePosition")
1163 {
1164 usesSampleID = true;
1165 usesSamplePosition = true;
1166 }
1167 else if (inputVarying.name == "gl_SampleMaskIn")
1168 {
1169 usesSampleMaskIn = true;
1170 }
1171 }
1172 }
1173
1174 bool usesFragColor = false;
1175 bool usesFragData = false;
1176 bool usesFragDepth = false;
1177 bool usesFragDepthEXT = false;
1178 bool usesSecondaryFragColorEXT = false;
1179 bool usesSecondaryFragDataEXT = false;
1180 for (const ShaderVariable &outputVarying : mOutputVariables)
1181 {
1182 if (outputVarying.isBuiltIn())
1183 {
1184 if (outputVarying.name == "gl_FragColor")
1185 {
1186 usesFragColor = true;
1187 }
1188 else if (outputVarying.name == "gl_FragData")
1189 {
1190 usesFragData = true;
1191 }
1192 else if (outputVarying.name == "gl_FragDepth")
1193 {
1194 usesFragDepth = true;
1195 }
1196 else if (outputVarying.name == "gl_FragDepthEXT")
1197 {
1198 usesFragDepthEXT = true;
1199 }
1200 else if (outputVarying.name == "gl_SecondaryFragColorEXT")
1201 {
1202 usesSecondaryFragColorEXT = true;
1203 }
1204 else if (outputVarying.name == "gl_SecondaryFragDataEXT")
1205 {
1206 usesSecondaryFragDataEXT = true;
1207 }
1208 else if (outputVarying.name == "gl_SampleMask")
1209 {
1210 usesSampleMask = true;
1211 }
1212 }
1213 }
1214
1215 // A shader may assign values to either the set of gl_FragColor and gl_SecondaryFragColorEXT
1216 // or the set of gl_FragData and gl_SecondaryFragDataEXT, but not both.
1217 ASSERT((!usesFragColor && !usesSecondaryFragColorEXT) ||
1218 (!usesFragData && !usesSecondaryFragDataEXT));
1219
1220 if (usesFragColor)
1221 {
1222 AddFragColorDeclaration(*root, symbolTable, *BuiltInVariable::gl_FragColor());
1223 }
1224 else if (usesFragData)
1225 {
1226 if (!AddFragDataDeclaration(*this, *root, usesSecondaryFragDataEXT, false))
1227 {
1228 return false;
1229 }
1230 }
1231
1232 if (usesFragDepth)
1233 {
1234 AddBuiltInDeclaration(*root, symbolTable, *BuiltInVariable::gl_FragDepth());
1235 }
1236 else if (usesFragDepthEXT)
1237 {
1238 AddFragDepthEXTDeclaration(*this, *root, symbolTable);
1239 }
1240
1241 if (usesSecondaryFragColorEXT)
1242 {
1243 AddFragColorDeclaration(*root, symbolTable,
1244 *BuiltInVariable::gl_SecondaryFragColorEXT());
1245 }
1246 else if (usesSecondaryFragDataEXT)
1247 {
1248 if (!AddFragDataDeclaration(*this, *root, usesSecondaryFragDataEXT, true))
1249 {
1250 return false;
1251 }
1252 }
1253
1254 bool usesSampleInterpolation = false;
1255 bool usesSampleInterpolant = false;
1256 if ((getShaderVersion() >= 320 ||
1257 IsExtensionEnabled(getExtensionBehavior(),
1258 TExtension::OES_shader_multisample_interpolation)) &&
1259 !RewriteInterpolants(*this, *root, symbolTable, driverUniforms,
1260 &usesSampleInterpolation, &usesSampleInterpolant))
1261 {
1262 return false;
1263 }
1264
1265 if (usesSampleID || (usesSampleMaskIn && usesSampleInterpolation) || usesSampleInterpolant)
1266 {
1267 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_SampleID());
1268 }
1269
1270 if (usesSamplePosition)
1271 {
1272 if (!AddSamplePositionDeclaration(*this, *root, symbolTable, driverUniforms))
1273 {
1274 return false;
1275 }
1276 }
1277
1278 if (usesSampleMaskIn)
1279 {
1280 if (!AddSampleMaskInDeclaration(*this, *root, symbolTable, driverUniforms,
1281 usesSampleID || usesSampleInterpolation))
1282 {
1283 return false;
1284 }
1285 }
1286
1287 if (usesPointCoord)
1288 {
1289 TIntermTyped *flipNegXY =
1290 driverUniforms->getNegFlipXY(&getSymbolTable(), DriverUniformFlip::Fragment);
1291 TIntermConstantUnion *pivot = CreateFloatNode(0.5f, EbpMedium);
1292 if (!FlipBuiltinVariable(this, root, GetMainSequence(root), flipNegXY,
1293 &getSymbolTable(), BuiltInVariable::gl_PointCoord(),
1294 kFlippedPointCoordName, pivot))
1295 {
1296 return false;
1297 }
1298 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_PointCoord());
1299 }
1300
1301 if (usesFragCoord || compileOptions.emulateAlphaToCoverage ||
1302 compileOptions.metal.generateShareableShaders)
1303 {
1304 if (!InsertFragCoordCorrection(this, compileOptions, root, GetMainSequence(root),
1305 &getSymbolTable(), driverUniforms))
1306 {
1307 return false;
1308 }
1309 const TVariable *fragCoord = static_cast<const TVariable *>(
1310 getSymbolTable().findBuiltIn(ImmutableString("gl_FragCoord"), getShaderVersion()));
1311 DeclareRightBeforeMain(*root, *fragCoord);
1312 }
1313
1314 if (!RewriteDfdy(this, root, &getSymbolTable(), getShaderVersion(), specConst,
1315 driverUniforms))
1316 {
1317 return false;
1318 }
1319
1320 if (getClipDistanceArraySize())
1321 {
1322 if (!EmulateClipDistanceVaryings(this, root, &getSymbolTable(), getShaderType()))
1323 {
1324 return false;
1325 }
1326 }
1327
1328 if (usesFrontFacing)
1329 {
1330 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_FrontFacing());
1331 }
1332
1333 bool usesNumSamples = false;
1334 for (const ShaderVariable &uniform : mUniforms)
1335 {
1336 if (uniform.name == "gl_NumSamples")
1337 {
1338 usesNumSamples = true;
1339 break;
1340 }
1341 }
1342
1343 if (usesNumSamples)
1344 {
1345 if (!AddNumSamplesDeclaration(*this, *root, symbolTable))
1346 {
1347 return false;
1348 }
1349 }
1350 }
1351 else if (getShaderType() == GL_VERTEX_SHADER)
1352 {
1353 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_Position());
1354
1355 if (FindSymbolNode(root, BuiltInVariable::gl_PointSize()->name()))
1356 {
1357 const TVariable *pointSize = static_cast<const TVariable *>(
1358 getSymbolTable().findBuiltIn(ImmutableString("gl_PointSize"), getShaderVersion()));
1359 DeclareRightBeforeMain(*root, *pointSize);
1360 }
1361
1362 // Append a macro for transform feedback substitution prior to modifying depth.
1363 if (!AppendVertexShaderTransformFeedbackOutputToMain(*this, symbolEnv, *root))
1364 {
1365 return false;
1366 }
1367
1368 if (getClipDistanceArraySize())
1369 {
1370 if (!ZeroDisabledClipDistanceAssignments(this, root, &getSymbolTable(), getShaderType(),
1371 driverUniforms->getClipDistancesEnabled()))
1372 {
1373 return false;
1374 }
1375
1376 if (IsExtensionEnabled(getExtensionBehavior(), TExtension::ANGLE_clip_cull_distance) &&
1377 !EmulateClipDistanceVaryings(this, root, &getSymbolTable(), getShaderType()))
1378 {
1379 return false;
1380 }
1381 }
1382
1383 if (!transformDepthBeforeCorrection(root, driverUniforms))
1384 {
1385 return false;
1386 }
1387
1388 if (!appendVertexShaderDepthCorrectionToMain(root, driverUniforms))
1389 {
1390 return false;
1391 }
1392 }
1393
1394 if (getShaderType() == GL_VERTEX_SHADER)
1395 {
1396 TIntermTyped *flipNegY =
1397 driverUniforms->getFlipXY(&getSymbolTable(), DriverUniformFlip::PreFragment);
1398 flipNegY = (new TIntermSwizzle(flipNegY, {1}))->fold(nullptr);
1399
1400 if (!AppendVertexShaderPositionYCorrectionToMain(this, root, &getSymbolTable(), flipNegY))
1401 {
1402 return false;
1403 }
1404 if (!insertRasterizationDiscardLogic(*root))
1405 {
1406 return false;
1407 }
1408 }
1409 else if (getShaderType() == GL_FRAGMENT_SHADER)
1410 {
1411 mValidateASTOptions.validateVariableReferences = false;
1412 if (!AddSampleMaskDeclaration(*this, *root, symbolTable, driverUniforms,
1413 compileOptions.emulateAlphaToCoverage ||
1414 compileOptions.metal.generateShareableShaders,
1415 usesSampleMask))
1416 {
1417 return false;
1418 }
1419 }
1420
1421 if (!validateAST(root))
1422 {
1423 return false;
1424 }
1425
1426 // This is the largest size required to pass all the tests in
1427 // (dEQP-GLES3.functional.shaders.large_constant_arrays)
1428 // This value could in principle be smaller.
1429 const size_t hoistThresholdSize = 256;
1430 if (!HoistConstants(*this, *root, idGen, hoistThresholdSize))
1431 {
1432 return false;
1433 }
1434
1435 if (!ConvertUnsupportedConstructorsToFunctionCalls(*this, *root))
1436 {
1437 return false;
1438 }
1439
1440 const bool needsExplicitBoolCasts = compileOptions.addExplicitBoolCasts;
1441 if (!AddExplicitTypeCasts(*this, *root, symbolEnv, needsExplicitBoolCasts))
1442 {
1443 return false;
1444 }
1445
1446 if (!SeparateCompoundStructDeclarations(*this, idGen, *root))
1447 {
1448 return false;
1449 }
1450
1451 if (!SeparateCompoundExpressions(*this, symbolEnv, idGen, *root))
1452 {
1453 return false;
1454 }
1455
1456 if (!ReduceInterfaceBlocks(*this, *root, idGen))
1457 {
1458 return false;
1459 }
1460
1461 // The RewritePipelines phase leaves the tree in an inconsistent state by inserting
1462 // references to structures like "ANGLE_TextureEnv<metal::texture2d<float>>" which are
1463 // defined in text (in ProgramPrelude), outside of the knowledge of the AST.
1464 mValidateASTOptions.validateStructUsage = false;
1465 // The RewritePipelines phase also generates incoming arguments to synthesized
1466 // functions that use are missing qualifiers - for example, angleUniforms isn't marked
1467 // as an incoming argument.
1468 mValidateASTOptions.validateQualifiers = false;
1469
1470 PipelineStructs pipelineStructs;
1471 if (!RewritePipelines(*this, *root, getInputVaryings(), getOutputVaryings(), idGen,
1472 *driverUniforms, symbolEnv, pipelineStructs))
1473 {
1474 return false;
1475 }
1476 if (getShaderType() == GL_VERTEX_SHADER)
1477 {
1478 // This has to happen after RewritePipelines.
1479 if (!IntroduceVertexAndInstanceIndex(*this, *root))
1480 {
1481 return false;
1482 }
1483 }
1484
1485 if (!RewriteCaseDeclarations(*this, *root))
1486 {
1487 return false;
1488 }
1489
1490 if (!RewriteUnaddressableReferences(*this, *root, symbolEnv))
1491 {
1492 return false;
1493 }
1494
1495 if (!RewriteOutArgs(*this, *root, symbolEnv))
1496 {
1497 return false;
1498 }
1499 if (!FixTypeConstructors(*this, symbolEnv, *root))
1500 {
1501 return false;
1502 }
1503 if (!ToposortStructs(*this, symbolEnv, *root, ppc))
1504 {
1505 return false;
1506 }
1507 if (!EmitMetal(*this, *root, idGen, pipelineStructs, symbolEnv, ppc, compileOptions))
1508 {
1509 return false;
1510 }
1511
1512 ASSERT(validateAST(root));
1513
1514 return true;
1515 }
1516
translate(TIntermBlock * root,const ShCompileOptions & compileOptions,PerformanceDiagnostics * perfDiagnostics)1517 bool TranslatorMSL::translate(TIntermBlock *root,
1518 const ShCompileOptions &compileOptions,
1519 PerformanceDiagnostics *perfDiagnostics)
1520 {
1521 if (!root)
1522 {
1523 return false;
1524 }
1525
1526 // TODO: refactor the code in TranslatorMSL to not issue raw function calls.
1527 // http://anglebug.com/6059#c2
1528 mValidateASTOptions.validateNoRawFunctionCalls = false;
1529 // A validation error is generated in this backend due to bool uniforms.
1530 mValidateASTOptions.validatePrecision = false;
1531
1532 TInfoSinkBase &sink = getInfoSink().obj;
1533 SpecConst specConst(&getSymbolTable(), compileOptions, getShaderType());
1534 DriverUniformMetal driverUniforms(DriverUniformMode::Structure);
1535 if (!translateImpl(sink, root, compileOptions, perfDiagnostics, &specConst, &driverUniforms))
1536 {
1537 return false;
1538 }
1539
1540 return true;
1541 }
shouldFlattenPragmaStdglInvariantAll()1542 bool TranslatorMSL::shouldFlattenPragmaStdglInvariantAll()
1543 {
1544 // Not neccesary for MSL transformation.
1545 return false;
1546 }
1547
1548 } // namespace sh
1549