1 //
2 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 #include "compiler/translator/msl/TranslatorMSL.h"
8
9 #include "angle_gl.h"
10 #include "common/utilities.h"
11 #include "compiler/translator/ImmutableStringBuilder.h"
12 #include "compiler/translator/StaticType.h"
13 #include "compiler/translator/msl/AstHelpers.h"
14 #include "compiler/translator/msl/DriverUniformMetal.h"
15 #include "compiler/translator/msl/EmitMetal.h"
16 #include "compiler/translator/msl/Name.h"
17 #include "compiler/translator/msl/RewritePipelines.h"
18 #include "compiler/translator/msl/SymbolEnv.h"
19 #include "compiler/translator/msl/ToposortStructs.h"
20 #include "compiler/translator/msl/UtilsMSL.h"
21 #include "compiler/translator/tree_ops/InitializeVariables.h"
22 #include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h"
23 #include "compiler/translator/tree_ops/RemoveAtomicCounterBuiltins.h"
24 #include "compiler/translator/tree_ops/RemoveInactiveInterfaceVariables.h"
25 #include "compiler/translator/tree_ops/RewriteArrayOfArrayOfOpaqueUniforms.h"
26 #include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
27 #include "compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h"
28 #include "compiler/translator/tree_ops/RewriteDfdy.h"
29 #include "compiler/translator/tree_ops/RewriteStructSamplers.h"
30 #include "compiler/translator/tree_ops/SeparateStructFromUniformDeclarations.h"
31 #include "compiler/translator/tree_ops/msl/AddExplicitTypeCasts.h"
32 #include "compiler/translator/tree_ops/msl/ConvertUnsupportedConstructorsToFunctionCalls.h"
33 #include "compiler/translator/tree_ops/msl/FixTypeConstructors.h"
34 #include "compiler/translator/tree_ops/msl/HoistConstants.h"
35 #include "compiler/translator/tree_ops/msl/IntroduceVertexIndexID.h"
36 #include "compiler/translator/tree_ops/msl/NameEmbeddedUniformStructsMetal.h"
37 #include "compiler/translator/tree_ops/msl/ReduceInterfaceBlocks.h"
38 #include "compiler/translator/tree_ops/msl/RewriteCaseDeclarations.h"
39 #include "compiler/translator/tree_ops/msl/RewriteInterpolants.h"
40 #include "compiler/translator/tree_ops/msl/RewriteOutArgs.h"
41 #include "compiler/translator/tree_ops/msl/RewriteUnaddressableReferences.h"
42 #include "compiler/translator/tree_ops/msl/SeparateCompoundExpressions.h"
43 #include "compiler/translator/tree_ops/msl/SeparateCompoundStructDeclarations.h"
44 #include "compiler/translator/tree_ops/msl/WrapMain.h"
45 #include "compiler/translator/tree_util/BuiltIn.h"
46 #include "compiler/translator/tree_util/DriverUniform.h"
47 #include "compiler/translator/tree_util/FindFunction.h"
48 #include "compiler/translator/tree_util/FindMain.h"
49 #include "compiler/translator/tree_util/FindSymbolNode.h"
50 #include "compiler/translator/tree_util/IntermNode_util.h"
51 #include "compiler/translator/tree_util/ReplaceClipCullDistanceVariable.h"
52 #include "compiler/translator/tree_util/ReplaceVariable.h"
53 #include "compiler/translator/tree_util/RunAtTheBeginningOfShader.h"
54 #include "compiler/translator/tree_util/RunAtTheEndOfShader.h"
55 #include "compiler/translator/tree_util/SpecializationConstant.h"
56 #include "compiler/translator/util.h"
57
58 namespace sh
59 {
60
61 namespace
62 {
63
64 constexpr Name kFlippedPointCoordName("flippedPointCoord", SymbolType::AngleInternal);
65 constexpr Name kFlippedFragCoordName("flippedFragCoord", SymbolType::AngleInternal);
66
67 constexpr const TVariable kgl_VertexIDMetal(BuiltInId::gl_VertexID,
68 ImmutableString("gl_VertexID"),
69 SymbolType::BuiltIn,
70 TExtension::UNDEFINED,
71 StaticType::Get<EbtUInt, EbpHigh, EvqVertexID, 1, 1>());
72
73 class DeclareStructTypesTraverser : public TIntermTraverser
74 {
75 public:
DeclareStructTypesTraverser(TOutputMSL * outputMSL)76 explicit DeclareStructTypesTraverser(TOutputMSL *outputMSL)
77 : TIntermTraverser(true, false, false), mOutputMSL(outputMSL)
78 {}
79
visitDeclaration(Visit visit,TIntermDeclaration * node)80 bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
81 {
82 ASSERT(visit == PreVisit);
83 if (!mInGlobalScope)
84 {
85 return false;
86 }
87
88 const TIntermSequence &sequence = *(node->getSequence());
89 TIntermTyped *declarator = sequence.front()->getAsTyped();
90 const TType &type = declarator->getType();
91
92 if (type.isStructSpecifier())
93 {
94 const TStructure *structure = type.getStruct();
95
96 // Embedded structs should be parsed away by now.
97 ASSERT(structure->symbolType() != SymbolType::Empty);
98 // outputMSL->writeStructType(structure);
99
100 TIntermSymbol *symbolNode = declarator->getAsSymbolNode();
101 if (symbolNode && symbolNode->variable().symbolType() == SymbolType::Empty)
102 {
103 // Remove the struct specifier declaration from the tree so it isn't parsed again.
104 TIntermSequence emptyReplacement;
105 mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node,
106 std::move(emptyReplacement));
107 }
108 }
109 // TODO: REMOVE, used to remove 'unsued' warning
110 mOutputMSL = nullptr;
111
112 return false;
113 }
114
115 private:
116 TOutputMSL *mOutputMSL;
117 };
118
119 class DeclareDefaultUniformsTraverser : public TIntermTraverser
120 {
121 public:
DeclareDefaultUniformsTraverser(TInfoSinkBase * sink,ShHashFunction64 hashFunction,NameMap * nameMap)122 DeclareDefaultUniformsTraverser(TInfoSinkBase *sink,
123 ShHashFunction64 hashFunction,
124 NameMap *nameMap)
125 : TIntermTraverser(true, true, true),
126 mSink(sink),
127 mHashFunction(hashFunction),
128 mNameMap(nameMap),
129 mInDefaultUniform(false)
130 {}
131
visitDeclaration(Visit visit,TIntermDeclaration * node)132 bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
133 {
134 const TIntermSequence &sequence = *(node->getSequence());
135
136 // TODO(jmadill): Compound declarations.
137 ASSERT(sequence.size() == 1);
138
139 TIntermTyped *variable = sequence.front()->getAsTyped();
140 const TType &type = variable->getType();
141 bool isUniform = type.getQualifier() == EvqUniform && !type.isInterfaceBlock() &&
142 !IsOpaqueType(type.getBasicType());
143
144 if (visit == PreVisit)
145 {
146 if (isUniform)
147 {
148 (*mSink) << " " << GetTypeName(type, mHashFunction, mNameMap) << " ";
149 mInDefaultUniform = true;
150 }
151 }
152 else if (visit == InVisit)
153 {
154 mInDefaultUniform = isUniform;
155 }
156 else if (visit == PostVisit)
157 {
158 if (isUniform)
159 {
160 (*mSink) << ";\n";
161
162 // Remove the uniform declaration from the tree so it isn't parsed again.
163 TIntermSequence emptyReplacement;
164 mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node,
165 std::move(emptyReplacement));
166 }
167
168 mInDefaultUniform = false;
169 }
170 return true;
171 }
172
visitSymbol(TIntermSymbol * symbol)173 void visitSymbol(TIntermSymbol *symbol) override
174 {
175 if (mInDefaultUniform)
176 {
177 const ImmutableString &name = symbol->variable().name();
178 ASSERT(!gl::IsBuiltInName(name.data()));
179 (*mSink) << HashName(&symbol->variable(), mHashFunction, mNameMap)
180 << ArrayString(symbol->getType());
181 }
182 }
183
184 private:
185 TInfoSinkBase *mSink;
186 ShHashFunction64 mHashFunction;
187 NameMap *mNameMap;
188 bool mInDefaultUniform;
189 };
190
191 // Declares a new variable to replace gl_DepthRange, its values are fed from a driver uniform.
ReplaceGLDepthRangeWithDriverUniform(TCompiler * compiler,TIntermBlock * root,const DriverUniformMetal * driverUniforms,TSymbolTable * symbolTable)192 [[nodiscard]] bool ReplaceGLDepthRangeWithDriverUniform(TCompiler *compiler,
193 TIntermBlock *root,
194 const DriverUniformMetal *driverUniforms,
195 TSymbolTable *symbolTable)
196 {
197 // Create a symbol reference to "gl_DepthRange"
198 const TVariable *depthRangeVar = static_cast<const TVariable *>(
199 symbolTable->findBuiltIn(ImmutableString("gl_DepthRange"), 0));
200
201 // ANGLEUniforms.depthRange
202 TIntermTyped *angleEmulatedDepthRangeRef = driverUniforms->getDepthRange();
203
204 // Use this variable instead of gl_DepthRange everywhere.
205 return ReplaceVariableWithTyped(compiler, root, depthRangeVar, angleEmulatedDepthRangeRef);
206 }
207
GetMainSequence(TIntermBlock * root)208 TIntermSequence *GetMainSequence(TIntermBlock *root)
209 {
210 TIntermFunctionDefinition *main = FindMain(root);
211 return main->getBody()->getSequence();
212 }
213
214 // Replaces a builtin variable with a version that is rotated and corrects the X and Y coordinates.
FlipBuiltinVariable(TCompiler * compiler,TIntermBlock * root,TIntermSequence * insertSequence,TIntermTyped * flipXY,TSymbolTable * symbolTable,const TVariable * builtin,const Name & flippedVariableName,TIntermTyped * pivot)215 [[nodiscard]] bool FlipBuiltinVariable(TCompiler *compiler,
216 TIntermBlock *root,
217 TIntermSequence *insertSequence,
218 TIntermTyped *flipXY,
219 TSymbolTable *symbolTable,
220 const TVariable *builtin,
221 const Name &flippedVariableName,
222 TIntermTyped *pivot)
223 {
224 // Create a symbol reference to 'builtin'.
225 TIntermSymbol *builtinRef = new TIntermSymbol(builtin);
226
227 // Create a swizzle to "builtin.xy"
228 TVector<int> swizzleOffsetXY = {0, 1};
229 TIntermSwizzle *builtinXY = new TIntermSwizzle(builtinRef, swizzleOffsetXY);
230
231 // Create a symbol reference to our new variable that will hold the modified builtin.
232 const TType *type =
233 StaticType::GetForVec<EbtFloat, EbpHigh>(EvqGlobal, builtin->getType().getNominalSize());
234 TVariable *replacementVar =
235 new TVariable(symbolTable, flippedVariableName.rawName(), type, SymbolType::AngleInternal);
236 DeclareGlobalVariable(root, replacementVar);
237 TIntermSymbol *flippedBuiltinRef = new TIntermSymbol(replacementVar);
238
239 // Use this new variable instead of 'builtin' everywhere.
240 if (!ReplaceVariable(compiler, root, builtin, replacementVar))
241 {
242 return false;
243 }
244
245 // Create the expression "(builtin.xy - pivot) * flipXY + pivot
246 TIntermBinary *removePivot = new TIntermBinary(EOpSub, builtinXY, pivot);
247 TIntermBinary *inverseXY = new TIntermBinary(EOpMul, removePivot, flipXY);
248 TIntermBinary *plusPivot = new TIntermBinary(EOpAdd, inverseXY, pivot->deepCopy());
249
250 // Create the corrected variable and copy the value of the original builtin.
251 TIntermSequence sequence;
252 sequence.push_back(builtinRef->deepCopy());
253 TIntermAggregate *aggregate =
254 TIntermAggregate::CreateConstructor(builtin->getType(), &sequence);
255 TIntermBinary *assignment = new TIntermBinary(EOpAssign, flippedBuiltinRef, aggregate);
256
257 // Create an assignment to the replaced variable's .xy.
258 TIntermSwizzle *correctedXY =
259 new TIntermSwizzle(flippedBuiltinRef->deepCopy(), swizzleOffsetXY);
260 TIntermBinary *assignToY = new TIntermBinary(EOpAssign, correctedXY, plusPivot);
261
262 // Add this assigment at the beginning of the main function
263 insertSequence->insert(insertSequence->begin(), assignToY);
264 insertSequence->insert(insertSequence->begin(), assignment);
265
266 return compiler->validateAST(root);
267 }
268
InsertFragCoordCorrection(TCompiler * compiler,const ShCompileOptions & compileOptions,TIntermBlock * root,TIntermSequence * insertSequence,TSymbolTable * symbolTable,const DriverUniformMetal * driverUniforms)269 [[nodiscard]] bool InsertFragCoordCorrection(TCompiler *compiler,
270 const ShCompileOptions &compileOptions,
271 TIntermBlock *root,
272 TIntermSequence *insertSequence,
273 TSymbolTable *symbolTable,
274 const DriverUniformMetal *driverUniforms)
275 {
276 TIntermTyped *flipXY = driverUniforms->getFlipXY(symbolTable, DriverUniformFlip::Fragment);
277 TIntermTyped *pivot = driverUniforms->getHalfRenderArea();
278
279 const TVariable *fragCoord = static_cast<const TVariable *>(
280 symbolTable->findBuiltIn(ImmutableString("gl_FragCoord"), compiler->getShaderVersion()));
281 return FlipBuiltinVariable(compiler, root, insertSequence, flipXY, symbolTable, fragCoord,
282 kFlippedFragCoordName, pivot);
283 }
284
DeclareRightBeforeMain(TIntermBlock & root,const TVariable & var)285 void DeclareRightBeforeMain(TIntermBlock &root, const TVariable &var)
286 {
287 root.insertChildNodes(FindMainIndex(&root), {new TIntermDeclaration{&var}});
288 }
289
AddFragColorDeclaration(TIntermBlock & root,TSymbolTable & symbolTable,const TVariable & var)290 void AddFragColorDeclaration(TIntermBlock &root, TSymbolTable &symbolTable, const TVariable &var)
291 {
292 root.insertChildNodes(FindMainIndex(&root), TIntermSequence{new TIntermDeclaration{&var}});
293 }
294
AddFragDepthDeclaration(TIntermBlock & root,TSymbolTable & symbolTable)295 void AddFragDepthDeclaration(TIntermBlock &root, TSymbolTable &symbolTable)
296 {
297 // Check if the variable has been already declared.
298 const TIntermSymbol *fragDepthBuiltIn = new TIntermSymbol(BuiltInVariable::gl_FragDepth());
299 const TIntermSymbol *fragDepthSymbol = FindSymbolNode(&root, ImmutableString("gl_FragDepth"));
300 if (fragDepthSymbol && fragDepthSymbol->uniqueId() != fragDepthBuiltIn->uniqueId())
301 {
302 return;
303 }
304 root.insertChildNodes(FindMainIndex(&root),
305 TIntermSequence{new TIntermDeclaration{BuiltInVariable::gl_FragDepth()}});
306 }
307
AddFragDepthEXTDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable)308 void AddFragDepthEXTDeclaration(TCompiler &compiler, TIntermBlock &root, TSymbolTable &symbolTable)
309 {
310 const TIntermSymbol *glFragDepthExt = FindSymbolNode(&root, ImmutableString("gl_FragDepthEXT"));
311 ASSERT(glFragDepthExt);
312 // Replace gl_FragData with our globally defined fragdata.
313 if (!ReplaceVariable(&compiler, &root, &(glFragDepthExt->variable()),
314 BuiltInVariable::gl_FragDepth()))
315 {
316 return;
317 }
318 AddFragDepthDeclaration(root, symbolTable);
319 }
320
AddNumSamplesDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable)321 [[nodiscard]] bool AddNumSamplesDeclaration(TCompiler &compiler,
322 TIntermBlock &root,
323 TSymbolTable &symbolTable)
324 {
325 const TVariable *glNumSamples = BuiltInVariable::gl_NumSamples();
326 DeclareRightBeforeMain(root, *glNumSamples);
327
328 // gl_NumSamples = metal::get_num_samples();
329 TIntermBinary *assignment = new TIntermBinary(
330 TOperator::EOpAssign, new TIntermSymbol(glNumSamples),
331 CreateBuiltInFunctionCallNode("numSamples", {}, symbolTable, kESSLInternalBackendBuiltIns));
332 return RunAtTheBeginningOfShader(&compiler, &root, assignment);
333 }
334
AddSamplePositionDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms)335 [[nodiscard]] bool AddSamplePositionDeclaration(TCompiler &compiler,
336 TIntermBlock &root,
337 TSymbolTable &symbolTable,
338 const DriverUniformMetal *driverUniforms)
339 {
340 const TVariable *glSamplePosition = BuiltInVariable::gl_SamplePosition();
341 DeclareRightBeforeMain(root, *glSamplePosition);
342
343 // When rendering to a default FBO, gl_SamplePosition should
344 // be Y-flipped to match the actual sample location
345 // gl_SamplePosition = metal::get_sample_position(uint(gl_SampleID));
346 // gl_SamplePosition -= 0.5;
347 // gl_SamplePosition *= flipXY;
348 // gl_SamplePosition += 0.5;
349 TIntermBlock *block = new TIntermBlock;
350 block->appendStatement(new TIntermBinary(
351 TOperator::EOpAssign, new TIntermSymbol(glSamplePosition),
352 CreateBuiltInFunctionCallNode("samplePosition",
353 {TIntermAggregate::CreateConstructor(
354 *StaticType::GetBasic<EbtUInt, EbpHigh>(),
355 {new TIntermSymbol(BuiltInVariable::gl_SampleID())})},
356 symbolTable, kESSLInternalBackendBuiltIns)));
357 block->appendStatement(new TIntermBinary(TOperator::EOpSubAssign,
358 new TIntermSymbol(glSamplePosition),
359 CreateFloatNode(0.5f, EbpHigh)));
360 block->appendStatement(
361 new TIntermBinary(EOpMulAssign, new TIntermSymbol(glSamplePosition),
362 driverUniforms->getFlipXY(&symbolTable, DriverUniformFlip::Fragment)));
363 block->appendStatement(new TIntermBinary(TOperator::EOpAddAssign,
364 new TIntermSymbol(glSamplePosition),
365 CreateFloatNode(0.5f, EbpHigh)));
366 return RunAtTheBeginningOfShader(&compiler, &root, block);
367 }
368
AddSampleMaskInDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms,bool perSampleShading)369 [[nodiscard]] bool AddSampleMaskInDeclaration(TCompiler &compiler,
370 TIntermBlock &root,
371 TSymbolTable &symbolTable,
372 const DriverUniformMetal *driverUniforms,
373 bool perSampleShading)
374 {
375 // in highp int gl_SampleMaskIn[1]
376 const TVariable *glSampleMaskIn = static_cast<const TVariable *>(
377 symbolTable.findBuiltIn(ImmutableString("gl_SampleMaskIn"), compiler.getShaderVersion()));
378 DeclareRightBeforeMain(root, *glSampleMaskIn);
379
380 // Reference to gl_SampleMaskIn[0]
381 TIntermBinary *glSampleMaskIn0 =
382 new TIntermBinary(EOpIndexDirect, new TIntermSymbol(glSampleMaskIn), CreateIndexNode(0));
383
384 // When per-sample shading is active due to the use of a fragment input qualified
385 // by sample or due to the use of the gl_SampleID or gl_SamplePosition variables,
386 // only the bit for the current sample is set in gl_SampleMaskIn.
387 TIntermBlock *block = new TIntermBlock;
388 if (perSampleShading)
389 {
390 // gl_SampleMaskIn[0] = 1 << gl_SampleID;
391 block->appendStatement(new TIntermBinary(
392 EOpAssign, glSampleMaskIn0,
393 new TIntermBinary(EOpBitShiftLeft, CreateUIntNode(1),
394 new TIntermSymbol(BuiltInVariable::gl_SampleID()))));
395 }
396 else
397 {
398 // uint32_t ANGLE_metal_SampleMaskIn [[sample_mask]]
399 TVariable *angleSampleMaskIn = new TVariable(
400 &symbolTable, ImmutableString("metal_SampleMaskIn"),
401 new TType(EbtUInt, EbpHigh, EvqSampleMaskIn, 1), SymbolType::AngleInternal);
402 DeclareRightBeforeMain(root, *angleSampleMaskIn);
403
404 // gl_SampleMaskIn[0] = ANGLE_metal_SampleMaskIn;
405 block->appendStatement(
406 new TIntermBinary(EOpAssign, glSampleMaskIn0, new TIntermSymbol(angleSampleMaskIn)));
407 }
408
409 // Bits in the sample mask corresponding to covered samples
410 // that will be unset due to SAMPLE_COVERAGE or SAMPLE_MASK
411 // will not be set (section 4.1.3).
412 // if (ANGLEMultisampledRendering)
413 // {
414 // gl_SampleMaskIn[0] &= ANGLE_angleUniforms.coverageMask;
415 // }
416 TIntermBlock *coverageBlock = new TIntermBlock;
417 coverageBlock->appendStatement(new TIntermBinary(
418 EOpBitwiseAndAssign, glSampleMaskIn0->deepCopy(), driverUniforms->getCoverageMaskField()));
419
420 TVariable *sampleMaskEnabledVar = new TVariable(
421 &symbolTable, sh::ImmutableString(mtl::kMultisampledRenderingConstName),
422 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(), SymbolType::AngleInternal);
423 block->appendStatement(
424 new TIntermIfElse(new TIntermSymbol(sampleMaskEnabledVar), coverageBlock, nullptr));
425
426 return RunAtTheBeginningOfShader(&compiler, &root, block);
427 }
428
AddSampleMaskDeclaration(TCompiler & compiler,TIntermBlock & root,TSymbolTable & symbolTable,const DriverUniformMetal * driverUniforms,bool includeEmulateAlphaToCoverage,bool usesSampleMask)429 [[nodiscard]] bool AddSampleMaskDeclaration(TCompiler &compiler,
430 TIntermBlock &root,
431 TSymbolTable &symbolTable,
432 const DriverUniformMetal *driverUniforms,
433 bool includeEmulateAlphaToCoverage,
434 bool usesSampleMask)
435 {
436 // uint32_t ANGLE_metal_SampleMask [[sample_mask]]
437 TVariable *angleSampleMask =
438 new TVariable(&symbolTable, ImmutableString("metal_SampleMask"),
439 new TType(EbtUInt, EbpHigh, EvqSampleMask, 1), SymbolType::AngleInternal);
440 DeclareRightBeforeMain(root, *angleSampleMask);
441
442 // ANGLE_metal_SampleMask = ANGLE_angleUniforms.coverageMask;
443 TIntermBlock *block = new TIntermBlock;
444 block->appendStatement(new TIntermBinary(EOpAssign, new TIntermSymbol(angleSampleMask),
445 driverUniforms->getCoverageMaskField()));
446 if (usesSampleMask)
447 {
448 // out highp int gl_SampleMask[1];
449 const TVariable *glSampleMask = static_cast<const TVariable *>(
450 symbolTable.findBuiltIn(ImmutableString("gl_SampleMask"), compiler.getShaderVersion()));
451 DeclareRightBeforeMain(root, *glSampleMask);
452
453 // ANGLE_metal_SampleMask &= gl_SampleMask[0];
454 TIntermBinary *glSampleMask0 =
455 new TIntermBinary(EOpIndexDirect, new TIntermSymbol(glSampleMask), CreateIndexNode(0));
456 block->appendStatement(new TIntermBinary(
457 EOpBitwiseAndAssign, new TIntermSymbol(angleSampleMask), glSampleMask0));
458 }
459
460 if (includeEmulateAlphaToCoverage)
461 {
462 // Some Metal drivers ignore alpha-to-coverage state when a fragment
463 // shader writes to [[sample_mask]]. Moreover, Metal pipeline state
464 // does not support setting a global coverage mask, which would be used
465 // for emulating GL_SAMPLE_COVERAGE, so [[sample_mask]] is used instead.
466 // To support alpha-to-coverage regardless of the [[sample_mask]] usage,
467 // the former is always emulated on such drivers.
468 TIntermBlock *alphaBlock = new TIntermBlock;
469
470 // To reduce image artifacts due to regular coverage sample locations,
471 // alpha value thresholds that toggle individual samples are slightly
472 // different within 2x2 pixel blocks. Consider MSAAx4, for example.
473 // Instead of always enabling samples on evenly distributed alpha
474 // values like {51, 102, 153, 204} these thresholds may vary as follows
475 //
476 // Sample 0 Sample 1 Sample 2 Sample 3
477 // ----- ----- ----- ----- ----- ----- ----- -----
478 // | 7.5| 39.5| | 71.5|103.5| |135.5|167.5| |199.5|231.5|
479 // |----- -----| |----- -----| |----- -----| |----- -----|
480 // | 55.5| 23.5| |119.5| 87.5| |183.5|151.5| |247.5|215.5|
481 // ----- ----- ----- ----- ----- ----- ----- -----
482 // These threshold values may be expressed as
483 // 7.5 + P * 16 + 64 * sampleID
484 // where P is
485 // ((x << 1) - (y & 1)) & 3
486 // and constant values depend on the number of samples used.
487 TVariable *p = CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
488 TVariable *y = CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
489 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
490 p, new TIntermSwizzle(new TIntermSymbol(BuiltInVariable::gl_FragCoord()), {0})));
491 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
492 y, new TIntermSwizzle(new TIntermSymbol(BuiltInVariable::gl_FragCoord()), {1})));
493 alphaBlock->appendStatement(
494 new TIntermBinary(EOpBitShiftLeftAssign, new TIntermSymbol(p), CreateIndexNode(1)));
495 alphaBlock->appendStatement(
496 new TIntermBinary(EOpBitwiseAndAssign, new TIntermSymbol(y), CreateIndexNode(1)));
497 alphaBlock->appendStatement(
498 new TIntermBinary(EOpSubAssign, new TIntermSymbol(p), new TIntermSymbol(y)));
499 alphaBlock->appendStatement(
500 new TIntermBinary(EOpBitwiseAndAssign, new TIntermSymbol(p), CreateIndexNode(3)));
501
502 // This internal variable, defined in-text in the function constants section,
503 // will point to the alpha channel of the color zero output. Due to potential
504 // EXT_blend_func_extended usage, the exact variable may be unknown until the
505 // program is linked.
506 TVariable *alpha0 =
507 new TVariable(&symbolTable, sh::ImmutableString("_ALPHA0"),
508 StaticType::Get<EbtFloat, EbpUndefined, EvqSpecConst, 1, 1>(),
509 SymbolType::AngleInternal);
510
511 // Use metal::saturate to clamp the alpha value to [0.0, 1.0] and scale it
512 // to [0.0, 510.0] since further operations expect an integer alpha value.
513 TVariable *alphaScaled =
514 CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtFloat, EbpHigh>());
515 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
516 alphaScaled, CreateBuiltInFunctionCallNode("saturate", {new TIntermSymbol(alpha0)},
517 symbolTable, kESSLInternalBackendBuiltIns)));
518 alphaBlock->appendStatement(new TIntermBinary(EOpMulAssign, new TIntermSymbol(alphaScaled),
519 CreateFloatNode(510.0, EbpUndefined)));
520 // int alphaMask = int(alphaScaled);
521 TVariable *alphaMask =
522 CreateTempVariable(&symbolTable, StaticType::GetBasic<EbtInt, EbpHigh>());
523 alphaBlock->appendStatement(CreateTempInitDeclarationNode(
524 alphaMask, TIntermAggregate::CreateConstructor(*StaticType::GetBasic<EbtInt, EbpHigh>(),
525 {new TIntermSymbol(alphaScaled)})));
526
527 // Next operations depend on the number of samples in the curent render target.
528 TIntermBlock *switchBlock = new TIntermBlock();
529
530 auto computeNumberOfSamples = [&](int step, int bias, int scale) {
531 switchBlock->appendStatement(new TIntermBinary(
532 EOpBitShiftLeftAssign, new TIntermSymbol(p), CreateIndexNode(step)));
533 switchBlock->appendStatement(new TIntermBinary(
534 EOpAddAssign, new TIntermSymbol(alphaMask), CreateIndexNode(bias)));
535 switchBlock->appendStatement(new TIntermBinary(
536 EOpSubAssign, new TIntermSymbol(alphaMask), new TIntermSymbol(p)));
537 switchBlock->appendStatement(new TIntermBinary(
538 EOpBitShiftRightAssign, new TIntermSymbol(alphaMask), CreateIndexNode(scale)));
539 };
540
541 // MSAAx2
542 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(2)));
543
544 // Canonical threshold values are
545 // 15.5 + P * 32 + 128 * sampleID
546 // With alpha values scaled to [0, 510], the number of covered samples is
547 // (alphaScaled + 256 - (31 + P * 64)) / 256
548 // which could be simplified to
549 // (alphaScaled + 225 - (P << 6)) >> 8
550 computeNumberOfSamples(6, 225, 8);
551
552 // In a case of only two samples, the coverage mask is
553 // mask = (num_covered_samples * 3) >> 1
554 switchBlock->appendStatement(
555 new TIntermBinary(EOpMulAssign, new TIntermSymbol(alphaMask), CreateIndexNode(3)));
556 switchBlock->appendStatement(new TIntermBinary(
557 EOpBitShiftRightAssign, new TIntermSymbol(alphaMask), CreateIndexNode(1)));
558
559 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
560
561 // MSAAx4
562 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(4)));
563
564 // Canonical threshold values are
565 // 7.5 + P * 16 + 64 * sampleID
566 // With alpha values scaled to [0, 510], the number of covered samples is
567 // (alphaScaled + 128 - (15 + P * 32)) / 128
568 // which could be simplified to
569 // (alphaScaled + 113 - (P << 5)) >> 7
570 computeNumberOfSamples(5, 113, 7);
571
572 // When two out of four samples should be covered, prioritize
573 // those that are located in the opposite corners of a pixel.
574 // 0: 0000, 1: 0001, 2: 1001, 3: 1011, 4: 1111
575 // mask = (0xFB910 >> (num_covered_samples * 4)) & 0xF
576 // The final AND may be omitted because the rasterizer output
577 // is limited to four samples.
578 switchBlock->appendStatement(new TIntermBinary(
579 EOpBitShiftLeftAssign, new TIntermSymbol(alphaMask), CreateIndexNode(2)));
580 switchBlock->appendStatement(
581 new TIntermBinary(EOpAssign, new TIntermSymbol(alphaMask),
582 new TIntermBinary(EOpBitShiftRight, CreateIndexNode(0xFB910),
583 new TIntermSymbol(alphaMask))));
584
585 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
586
587 // MSAAx8
588 switchBlock->appendStatement(new TIntermCase(CreateIndexNode(8)));
589
590 // Canonical threshold values are
591 // 3.5 + P * 8 + 32 * sampleID
592 // With alpha values scaled to [0, 510], the number of covered samples is
593 // (alphaScaled + 64 - (7 + P * 16)) / 64
594 // which could be simplified to
595 // (alphaScaled + 57 - (P << 4)) >> 6
596 computeNumberOfSamples(4, 57, 6);
597
598 // When eight samples are used, they could be enabled one by one
599 // mask = ~(0xFFFFFFFF << num_covered_samples)
600 switchBlock->appendStatement(
601 new TIntermBinary(EOpAssign, new TIntermSymbol(alphaMask),
602 new TIntermBinary(EOpBitShiftLeft, CreateUIntNode(0xFFFFFFFFu),
603 new TIntermSymbol(alphaMask))));
604 switchBlock->appendStatement(new TIntermBinary(
605 EOpAssign, new TIntermSymbol(alphaMask),
606 new TIntermUnary(EOpBitwiseNot, new TIntermSymbol(alphaMask), nullptr)));
607
608 switchBlock->appendStatement(new TIntermBranch(EOpBreak, nullptr));
609
610 alphaBlock->getSequence()->push_back(
611 new TIntermSwitch(CreateBuiltInFunctionCallNode("numSamples", {}, symbolTable,
612 kESSLInternalBackendBuiltIns),
613 switchBlock));
614
615 alphaBlock->appendStatement(new TIntermBinary(
616 EOpBitwiseAndAssign, new TIntermSymbol(angleSampleMask), new TIntermSymbol(alphaMask)));
617
618 TIntermBlock *emulateAlphaToCoverageEnabledBlock = new TIntermBlock;
619 emulateAlphaToCoverageEnabledBlock->appendStatement(
620 new TIntermIfElse(driverUniforms->getAlphaToCoverage(), alphaBlock, nullptr));
621
622 TVariable *emulateAlphaToCoverageVar =
623 new TVariable(&symbolTable, sh::ImmutableString(mtl::kEmulateAlphaToCoverageConstName),
624 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(),
625 SymbolType::AngleInternal);
626 TIntermIfElse *useAlphaToCoverage =
627 new TIntermIfElse(new TIntermSymbol(emulateAlphaToCoverageVar),
628 emulateAlphaToCoverageEnabledBlock, nullptr);
629
630 block->appendStatement(useAlphaToCoverage);
631 }
632
633 // Sample mask assignment is guarded by ANGLEMultisampledRendering specialization constant
634 TVariable *sampleMaskEnabledVar = new TVariable(
635 &symbolTable, sh::ImmutableString(mtl::kMultisampledRenderingConstName),
636 StaticType::Get<EbtBool, EbpUndefined, EvqSpecConst, 1, 1>(), SymbolType::AngleInternal);
637 return RunAtTheEndOfShader(
638 &compiler, &root,
639 new TIntermIfElse(new TIntermSymbol(sampleMaskEnabledVar), block, nullptr), &symbolTable);
640 }
641
AddFragDataDeclaration(TCompiler & compiler,TIntermBlock & root,bool usesSecondary,bool secondary)642 [[nodiscard]] bool AddFragDataDeclaration(TCompiler &compiler,
643 TIntermBlock &root,
644 bool usesSecondary,
645 bool secondary)
646 {
647 TSymbolTable &symbolTable = compiler.getSymbolTable();
648 const int maxDrawBuffers = usesSecondary ? compiler.getResources().MaxDualSourceDrawBuffers
649 : compiler.getResources().MaxDrawBuffers;
650 TType *gl_FragDataType =
651 new TType(EbtFloat, EbpMedium, secondary ? EvqSecondaryFragDataEXT : EvqFragData, 4, 1);
652 std::vector<const TVariable *> glFragDataSlots;
653 TIntermSequence declareGLFragdataSequence;
654
655 // Create gl_FragData_i or gl_SecondaryFragDataEXT_i
656 const char *fragData = "gl_FragData";
657 const char *secondaryFragDataEXT = "gl_SecondaryFragDataEXT";
658 const char *name = secondary ? secondaryFragDataEXT : fragData;
659 for (int i = 0; i < maxDrawBuffers; i++)
660 {
661 ImmutableStringBuilder builder(strlen(name) + 3);
662 builder << name << "_";
663 builder.appendDecimal(i);
664 const TVariable *glFragData =
665 new TVariable(&symbolTable, builder, gl_FragDataType, SymbolType::AngleInternal,
666 TExtension::UNDEFINED);
667 glFragDataSlots.push_back(glFragData);
668 declareGLFragdataSequence.push_back(new TIntermDeclaration{glFragData});
669 }
670 root.insertChildNodes(FindMainIndex(&root), declareGLFragdataSequence);
671
672 // Create an internal gl_FragData array type, compatible with indexing syntax.
673 TType *gl_FragDataTypeArray = new TType(EbtFloat, EbpMedium, EvqGlobal, 4, 1);
674 gl_FragDataTypeArray->makeArray(maxDrawBuffers);
675 const TVariable *glFragDataGlobal = new TVariable(&symbolTable, ImmutableString(name),
676 gl_FragDataTypeArray, SymbolType::BuiltIn);
677
678 DeclareGlobalVariable(&root, glFragDataGlobal);
679 const TIntermSymbol *originalGLFragData = FindSymbolNode(&root, ImmutableString(name));
680 ASSERT(originalGLFragData);
681
682 // Replace gl_FragData[] or gl_SecondaryFragDataEXT[] with our globally defined variable
683 if (!ReplaceVariable(&compiler, &root, &(originalGLFragData->variable()), glFragDataGlobal))
684 {
685 return false;
686 }
687
688 // Assign each array attribute to an output
689 TIntermBlock *insertSequence = new TIntermBlock();
690 for (int i = 0; i < maxDrawBuffers; i++)
691 {
692 TIntermTyped *glFragDataSlot = new TIntermSymbol(glFragDataSlots[i]);
693 TIntermTyped *glFragDataGlobalSymbol = new TIntermSymbol(glFragDataGlobal);
694 auto &access = AccessIndex(*glFragDataGlobalSymbol, i);
695 TIntermBinary *assignment =
696 new TIntermBinary(TOperator::EOpAssign, glFragDataSlot, &access);
697 insertSequence->appendStatement(assignment);
698 }
699 return RunAtTheEndOfShader(&compiler, &root, insertSequence, &symbolTable);
700 }
701
AppendVertexShaderTransformFeedbackOutputToMain(TCompiler & compiler,SymbolEnv & mSymbolEnv,TIntermBlock & root)702 [[nodiscard]] bool AppendVertexShaderTransformFeedbackOutputToMain(TCompiler &compiler,
703 SymbolEnv &mSymbolEnv,
704 TIntermBlock &root)
705 {
706 TSymbolTable &symbolTable = compiler.getSymbolTable();
707
708 // Append the assignment as a statement at the end of the shader.
709 return RunAtTheEndOfShader(&compiler, &root,
710 &(mSymbolEnv.callFunctionOverload(Name("@@XFB-OUT@@"), *new TType(),
711 *new TIntermSequence())),
712 &symbolTable);
713 }
714
715 // Unlike Vulkan having auto viewport flipping extension, in Metal we have to flip gl_Position.y
716 // manually.
717 // This operation performs flipping the gl_Position.y using this expression:
718 // gl_Position.y = gl_Position.y * negViewportScaleY
AppendVertexShaderPositionYCorrectionToMain(TCompiler * compiler,TIntermBlock * root,TSymbolTable * symbolTable,TIntermTyped * negFlipY)719 [[nodiscard]] bool AppendVertexShaderPositionYCorrectionToMain(TCompiler *compiler,
720 TIntermBlock *root,
721 TSymbolTable *symbolTable,
722 TIntermTyped *negFlipY)
723 {
724 // Create a symbol reference to "gl_Position"
725 const TVariable *position = BuiltInVariable::gl_Position();
726 TIntermSymbol *positionRef = new TIntermSymbol(position);
727
728 // Create a swizzle to "gl_Position.y"
729 TVector<int> swizzleOffsetY;
730 swizzleOffsetY.push_back(1);
731 TIntermSwizzle *positionY = new TIntermSwizzle(positionRef, swizzleOffsetY);
732
733 // Create the expression "gl_Position.y * negFlipY"
734 TIntermBinary *inverseY = new TIntermBinary(EOpMul, positionY->deepCopy(), negFlipY);
735
736 // Create the assignment "gl_Position.y = gl_Position.y * negViewportScaleY
737 TIntermTyped *positionYLHS = positionY->deepCopy();
738 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionYLHS, inverseY);
739
740 // Append the assignment as a statement at the end of the shader.
741 return RunAtTheEndOfShader(compiler, root, assignment, symbolTable);
742 }
743
EmulateClipDistanceVaryings(TCompiler * compiler,TIntermBlock * root,TSymbolTable * symbolTable,const GLenum shaderType)744 [[nodiscard]] bool EmulateClipDistanceVaryings(TCompiler *compiler,
745 TIntermBlock *root,
746 TSymbolTable *symbolTable,
747 const GLenum shaderType)
748 {
749 ASSERT(shaderType == GL_VERTEX_SHADER || shaderType == GL_FRAGMENT_SHADER);
750
751 const TVariable *clipDistanceVar =
752 &FindSymbolNode(root, ImmutableString("gl_ClipDistance"))->variable();
753
754 const bool fragment = shaderType == GL_FRAGMENT_SHADER;
755 if (fragment)
756 {
757 TType *globalType = new TType(EbtFloat, EbpHigh, EvqGlobal, 1, 1);
758 globalType->toArrayBaseType();
759 globalType->makeArray(compiler->getClipDistanceArraySize());
760
761 const TVariable *globalVar = new TVariable(symbolTable, ImmutableString("ClipDistance"),
762 globalType, SymbolType::AngleInternal);
763 if (!compiler->isClipDistanceRedeclared())
764 {
765 TIntermDeclaration *globalDecl = new TIntermDeclaration();
766 globalDecl->appendDeclarator(new TIntermSymbol(globalVar));
767 root->insertStatement(0, globalDecl);
768 }
769
770 if (!ReplaceVariable(compiler, root, clipDistanceVar, globalVar))
771 {
772 return false;
773 }
774 clipDistanceVar = globalVar;
775 }
776
777 TIntermBlock *assignBlock = new TIntermBlock();
778 size_t index = FindMainIndex(root);
779 TIntermSymbol *arraySym = new TIntermSymbol(clipDistanceVar);
780 TType *type = new TType(EbtFloat, EbpHigh, fragment ? EvqFragmentIn : EvqVertexOut, 1, 1);
781 for (uint8_t i = 0; i < compiler->getClipDistanceArraySize(); i++)
782 {
783 std::stringstream name;
784 name << "ClipDistance_" << static_cast<int>(i);
785 TIntermSymbol *varyingSym = new TIntermSymbol(new TVariable(
786 symbolTable, ImmutableString(name.str()), type, SymbolType::AngleInternal));
787
788 TIntermDeclaration *varyingDecl = new TIntermDeclaration();
789 varyingDecl->appendDeclarator(varyingSym);
790 root->insertStatement(index++, varyingDecl);
791
792 TIntermTyped *arrayAccess = new TIntermBinary(EOpIndexDirect, arraySym, CreateIndexNode(i));
793 assignBlock->appendStatement(new TIntermBinary(
794 EOpAssign, fragment ? arrayAccess : varyingSym, fragment ? varyingSym : arrayAccess));
795 }
796
797 return fragment ? RunAtTheBeginningOfShader(compiler, root, assignBlock)
798 : RunAtTheEndOfShader(compiler, root, assignBlock, symbolTable);
799 }
800 } // namespace
801
802 namespace mtl
803 {
getTranslatorMetalReflection(const TCompiler * compiler)804 TranslatorMetalReflection *getTranslatorMetalReflection(const TCompiler *compiler)
805 {
806 return ((TranslatorMSL *)compiler)->getTranslatorMetalReflection();
807 }
808 } // namespace mtl
TranslatorMSL(sh::GLenum type,ShShaderSpec spec,ShShaderOutput output)809 TranslatorMSL::TranslatorMSL(sh::GLenum type, ShShaderSpec spec, ShShaderOutput output)
810 : TCompiler(type, spec, output)
811 {}
812
insertRasterizationDiscardLogic(TIntermBlock & root)813 [[nodiscard]] bool TranslatorMSL::insertRasterizationDiscardLogic(TIntermBlock &root)
814 {
815 // This transformation leaves the tree in an inconsistent state by using a variable that's
816 // defined in text, outside of the knowledge of the AST.
817 mValidateASTOptions.validateVariableReferences = false;
818
819 TSymbolTable *symbolTable = &getSymbolTable();
820
821 TType *boolType = new TType(EbtBool);
822 boolType->setQualifier(EvqConst);
823 TVariable *discardEnabledVar =
824 new TVariable(symbolTable, sh::ImmutableString(sh::mtl::kRasterizerDiscardEnabledConstName),
825 boolType, SymbolType::AngleInternal);
826
827 const TVariable *position = BuiltInVariable::gl_Position();
828 TIntermSymbol *positionRef = new TIntermSymbol(position);
829
830 // Create vec4(-3, -3, -3, 1):
831 auto vec4Type = new TType(EbtFloat, 4);
832 TIntermSequence vec4Args = {
833 CreateFloatNode(-3.0f, EbpMedium),
834 CreateFloatNode(-3.0f, EbpMedium),
835 CreateFloatNode(-3.0f, EbpMedium),
836 CreateFloatNode(1.0f, EbpMedium),
837 };
838 TIntermAggregate *constVarConstructor =
839 TIntermAggregate::CreateConstructor(*vec4Type, &vec4Args);
840
841 // Create the assignment "gl_Position = vec4(-3, -3, -3, 1)"
842 TIntermBinary *assignment =
843 new TIntermBinary(TOperator::EOpAssign, positionRef->deepCopy(), constVarConstructor);
844
845 TIntermBlock *discardBlock = new TIntermBlock;
846 discardBlock->appendStatement(assignment);
847
848 TIntermSymbol *discardEnabled = new TIntermSymbol(discardEnabledVar);
849 TIntermIfElse *ifCall = new TIntermIfElse(discardEnabled, discardBlock, nullptr);
850
851 return RunAtTheEndOfShader(this, &root, ifCall, symbolTable);
852 }
853
854 // Metal needs to inverse the depth if depthRange is is reverse order, i.e. depth near > depth far
855 // This is achieved by multiply the depth value with scale value stored in
856 // driver uniform's depthRange.reserved
transformDepthBeforeCorrection(TIntermBlock * root,const DriverUniformMetal * driverUniforms)857 bool TranslatorMSL::transformDepthBeforeCorrection(TIntermBlock *root,
858 const DriverUniformMetal *driverUniforms)
859 {
860 // Create a symbol reference to "gl_Position"
861 const TVariable *position = BuiltInVariable::gl_Position();
862 TIntermSymbol *positionRef = new TIntermSymbol(position);
863
864 // Create a swizzle to "gl_Position.z"
865 TVector<int> swizzleOffsetZ = {2};
866 TIntermSwizzle *positionZ = new TIntermSwizzle(positionRef, swizzleOffsetZ);
867
868 // Create a ref to "zscale"
869 TIntermTyped *viewportZScale = driverUniforms->getViewportZScale();
870
871 // Create the expression "gl_Position.z * zscale".
872 TIntermBinary *zScale = new TIntermBinary(EOpMul, positionZ->deepCopy(), viewportZScale);
873
874 // Create the assignment "gl_Position.z = gl_Position.z * zscale"
875 TIntermTyped *positionZLHS = positionZ->deepCopy();
876 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionZLHS, zScale);
877
878 // Append the assignment as a statement at the end of the shader.
879 return RunAtTheEndOfShader(this, root, assignment, &getSymbolTable());
880 }
881
882 // This operation performs the viewport depth translation needed by Metal. GL uses a
883 // clip space z range of -1 to +1 where as Metal uses 0 to 1. The translation becomes
884 // this expression
885 //
886 // z_metal = 0.5 * (w_gl + z_gl)
887 //
888 // where z_metal is the depth output of a Metal vertex shader and z_gl is the same for GL.
889 // This operation is skipped when GL_CLIP_DEPTH_MODE_EXT is set to GL_ZERO_TO_ONE_EXT.
appendVertexShaderDepthCorrectionToMain(TIntermBlock * root,const DriverUniformMetal * driverUniforms)890 bool TranslatorMSL::appendVertexShaderDepthCorrectionToMain(
891 TIntermBlock *root,
892 const DriverUniformMetal *driverUniforms)
893 {
894 const TVariable *position = BuiltInVariable::gl_Position();
895 TIntermSymbol *positionRef = new TIntermSymbol(position);
896
897 TVector<int> swizzleOffsetZ = {2};
898 TIntermSwizzle *positionZ = new TIntermSwizzle(positionRef, swizzleOffsetZ);
899
900 TIntermConstantUnion *oneHalf = CreateFloatNode(0.5f, EbpMedium);
901
902 TVector<int> swizzleOffsetW = {3};
903 TIntermSwizzle *positionW = new TIntermSwizzle(positionRef->deepCopy(), swizzleOffsetW);
904
905 // Create the expression "(gl_Position.z + gl_Position.w) * 0.5".
906 TIntermBinary *zPlusW = new TIntermBinary(EOpAdd, positionZ->deepCopy(), positionW->deepCopy());
907 TIntermBinary *halfZPlusW = new TIntermBinary(EOpMul, zPlusW, oneHalf->deepCopy());
908
909 // Create the assignment "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5"
910 TIntermTyped *positionZLHS = positionZ->deepCopy();
911 TIntermBinary *assignment = new TIntermBinary(TOperator::EOpAssign, positionZLHS, halfZPlusW);
912
913 // Apply depth correction if needed
914 TIntermBlock *block = new TIntermBlock;
915 block->appendStatement(assignment);
916 TIntermIfElse *ifCall = new TIntermIfElse(driverUniforms->getTransformDepth(), block, nullptr);
917
918 // Append the assignment as a statement at the end of the shader.
919 return RunAtTheEndOfShader(this, root, ifCall, &getSymbolTable());
920 }
921
metalShaderTypeFromGLSL(sh::GLenum shaderType)922 static inline MetalShaderType metalShaderTypeFromGLSL(sh::GLenum shaderType)
923 {
924 switch (shaderType)
925 {
926 case GL_VERTEX_SHADER:
927 return MetalShaderType::Vertex;
928 case GL_FRAGMENT_SHADER:
929 return MetalShaderType::Fragment;
930 case GL_COMPUTE_SHADER:
931 ASSERT(0 && "compute shaders not currently supported");
932 return MetalShaderType::Compute;
933 default:
934 ASSERT(0 && "Invalid shader type.");
935 return MetalShaderType::None;
936 }
937 }
938
translateImpl(TInfoSinkBase & sink,TIntermBlock * root,const ShCompileOptions & compileOptions,PerformanceDiagnostics *,SpecConst * specConst,DriverUniformMetal * driverUniforms)939 bool TranslatorMSL::translateImpl(TInfoSinkBase &sink,
940 TIntermBlock *root,
941 const ShCompileOptions &compileOptions,
942 PerformanceDiagnostics * /*perfDiagnostics*/,
943 SpecConst *specConst,
944 DriverUniformMetal *driverUniforms)
945 {
946 TSymbolTable &symbolTable = getSymbolTable();
947 IdGen idGen;
948 ProgramPreludeConfig ppc(metalShaderTypeFromGLSL(getShaderType()));
949
950 if (!WrapMain(*this, idGen, *root))
951 {
952 return false;
953 }
954
955 // Remove declarations of inactive shader interface variables so glslang wrapper doesn't need to
956 // replace them. Note: this is done before extracting samplers from structs, as removing such
957 // inactive samplers is not yet supported. Note also that currently, CollectVariables marks
958 // every field of an active uniform that's of struct type as active, i.e. no extracted sampler
959 // is inactive.
960 if (!RemoveInactiveInterfaceVariables(this, root, &getSymbolTable(), getAttributes(),
961 getInputVaryings(), getOutputVariables(), getUniforms(),
962 getInterfaceBlocks(), false))
963 {
964 return false;
965 }
966
967 // Write out default uniforms into a uniform block assigned to a specific set/binding.
968 int aggregateTypesUsedForUniforms = 0;
969 int atomicCounterCount = 0;
970 for (const auto &uniform : getUniforms())
971 {
972 if (uniform.isStruct() || uniform.isArrayOfArrays())
973 {
974 ++aggregateTypesUsedForUniforms;
975 }
976
977 if (uniform.active && gl::IsAtomicCounterType(uniform.type))
978 {
979 ++atomicCounterCount;
980 }
981 }
982
983 // If there are any function calls that take array-of-array of opaque uniform parameters, or
984 // other opaque uniforms that need special handling in Vulkan, such as atomic counters,
985 // monomorphize the functions by removing said parameters and replacing them in the function
986 // body with the call arguments.
987 //
988 // This has a few benefits:
989 //
990 // - It dramatically simplifies future transformations w.r.t to samplers in structs, array of
991 // arrays of opaque types, atomic counters etc.
992 // - Avoids the need for shader*ArrayDynamicIndexing Vulkan features.
993 UnsupportedFunctionArgsBitSet args{UnsupportedFunctionArgs::StructContainingSamplers,
994 UnsupportedFunctionArgs::ArrayOfArrayOfSamplerOrImage,
995 UnsupportedFunctionArgs::AtomicCounter,
996 UnsupportedFunctionArgs::SamplerCubeEmulation,
997 UnsupportedFunctionArgs::Image};
998 if (!MonomorphizeUnsupportedFunctions(this, root, &getSymbolTable(), compileOptions, args))
999 {
1000 return false;
1001 }
1002
1003 if (aggregateTypesUsedForUniforms > 0)
1004 {
1005 if (!NameEmbeddedStructUniformsMetal(this, root, &symbolTable))
1006 {
1007 return false;
1008 }
1009
1010 if (!SeparateStructFromUniformDeclarations(this, root, &getSymbolTable()))
1011 {
1012 return false;
1013 }
1014
1015 int removedUniformsCount;
1016
1017 if (!RewriteStructSamplers(this, root, &getSymbolTable(), &removedUniformsCount))
1018 {
1019 return false;
1020 }
1021 }
1022
1023 // Replace array of array of opaque uniforms with a flattened array. This is run after
1024 // MonomorphizeUnsupportedFunctions and RewriteStructSamplers so that it's not possible for an
1025 // array of array of opaque type to be partially subscripted and passed to a function.
1026 if (!RewriteArrayOfArrayOfOpaqueUniforms(this, root, &getSymbolTable()))
1027 {
1028 return false;
1029 }
1030
1031 if (compileOptions.emulateSeamfulCubeMapSampling)
1032 {
1033 if (!RewriteCubeMapSamplersAs2DArray(this, root, &symbolTable,
1034 getShaderType() == GL_FRAGMENT_SHADER))
1035 {
1036 return false;
1037 }
1038 }
1039
1040 if (getShaderType() == GL_COMPUTE_SHADER)
1041 {
1042 driverUniforms->addComputeDriverUniformsToShader(root, &getSymbolTable());
1043 }
1044 else
1045 {
1046 driverUniforms->addGraphicsDriverUniformsToShader(root, &getSymbolTable());
1047 }
1048
1049 if (atomicCounterCount > 0)
1050 {
1051 const TIntermTyped *acbBufferOffsets = driverUniforms->getAcbBufferOffsets();
1052 if (!RewriteAtomicCounters(this, root, &symbolTable, acbBufferOffsets, nullptr))
1053 {
1054 return false;
1055 }
1056 }
1057 else if (getShaderVersion() >= 310)
1058 {
1059 // Vulkan doesn't support Atomic Storage as a Storage Class, but we've seen
1060 // cases where builtins are using it even with no active atomic counters.
1061 // This pass simply removes those builtins in that scenario.
1062 if (!RemoveAtomicCounterBuiltins(this, root))
1063 {
1064 return false;
1065 }
1066 }
1067
1068 if (getShaderType() != GL_COMPUTE_SHADER)
1069 {
1070 if (!ReplaceGLDepthRangeWithDriverUniform(this, root, driverUniforms, &getSymbolTable()))
1071 {
1072 return false;
1073 }
1074 }
1075
1076 {
1077 bool usesInstanceId = false;
1078 bool usesVertexId = false;
1079 for (const ShaderVariable &var : mAttributes)
1080 {
1081 if (var.isBuiltIn())
1082 {
1083 if (var.name == "gl_InstanceID")
1084 {
1085 usesInstanceId = true;
1086 }
1087 if (var.name == "gl_VertexID")
1088 {
1089 usesVertexId = true;
1090 }
1091 }
1092 }
1093
1094 if (usesInstanceId)
1095 {
1096 root->insertChildNodes(
1097 FindMainIndex(root),
1098 TIntermSequence{new TIntermDeclaration{BuiltInVariable::gl_InstanceID()}});
1099 }
1100 if (usesVertexId)
1101 {
1102 if (!ReplaceVariable(this, root, BuiltInVariable::gl_VertexID(), &kgl_VertexIDMetal))
1103 {
1104 return false;
1105 }
1106 DeclareRightBeforeMain(*root, kgl_VertexIDMetal);
1107 }
1108 }
1109 SymbolEnv symbolEnv(*this, *root);
1110
1111 bool usesSampleMask = false;
1112 if (getShaderType() == GL_FRAGMENT_SHADER)
1113 {
1114 bool usesPointCoord = false;
1115 bool usesFragCoord = false;
1116 bool usesFrontFacing = false;
1117 bool usesSampleID = false;
1118 bool usesSamplePosition = false;
1119 bool usesSampleMaskIn = false;
1120 for (const ShaderVariable &inputVarying : mInputVaryings)
1121 {
1122 if (inputVarying.isBuiltIn())
1123 {
1124 if (inputVarying.name == "gl_PointCoord")
1125 {
1126 usesPointCoord = true;
1127 }
1128 else if (inputVarying.name == "gl_FragCoord")
1129 {
1130 usesFragCoord = true;
1131 }
1132 else if (inputVarying.name == "gl_FrontFacing")
1133 {
1134 usesFrontFacing = true;
1135 }
1136 else if (inputVarying.name == "gl_SampleID")
1137 {
1138 usesSampleID = true;
1139 }
1140 else if (inputVarying.name == "gl_SamplePosition")
1141 {
1142 usesSampleID = true;
1143 usesSamplePosition = true;
1144 }
1145 else if (inputVarying.name == "gl_SampleMaskIn")
1146 {
1147 usesSampleMaskIn = true;
1148 }
1149 }
1150 }
1151
1152 bool usesFragColor = false;
1153 bool usesFragData = false;
1154 bool usesFragDepth = false;
1155 bool usesFragDepthEXT = false;
1156 bool usesSecondaryFragColorEXT = false;
1157 bool usesSecondaryFragDataEXT = false;
1158 for (const ShaderVariable &outputVarying : mOutputVariables)
1159 {
1160 if (outputVarying.isBuiltIn())
1161 {
1162 if (outputVarying.name == "gl_FragColor")
1163 {
1164 usesFragColor = true;
1165 }
1166 else if (outputVarying.name == "gl_FragData")
1167 {
1168 usesFragData = true;
1169 }
1170 else if (outputVarying.name == "gl_FragDepth")
1171 {
1172 usesFragDepth = true;
1173 }
1174 else if (outputVarying.name == "gl_FragDepthEXT")
1175 {
1176 usesFragDepthEXT = true;
1177 }
1178 else if (outputVarying.name == "gl_SecondaryFragColorEXT")
1179 {
1180 usesSecondaryFragColorEXT = true;
1181 }
1182 else if (outputVarying.name == "gl_SecondaryFragDataEXT")
1183 {
1184 usesSecondaryFragDataEXT = true;
1185 }
1186 else if (outputVarying.name == "gl_SampleMask")
1187 {
1188 usesSampleMask = true;
1189 }
1190 }
1191 }
1192
1193 // A shader may assign values to either the set of gl_FragColor and gl_SecondaryFragColorEXT
1194 // or the set of gl_FragData and gl_SecondaryFragDataEXT, but not both.
1195 ASSERT((!usesFragColor && !usesSecondaryFragColorEXT) ||
1196 (!usesFragData && !usesSecondaryFragDataEXT));
1197
1198 if (usesFragColor)
1199 {
1200 AddFragColorDeclaration(*root, symbolTable, *BuiltInVariable::gl_FragColor());
1201 }
1202 else if (usesFragData)
1203 {
1204 if (!AddFragDataDeclaration(*this, *root, usesSecondaryFragDataEXT, false))
1205 {
1206 return false;
1207 }
1208 }
1209
1210 if (usesFragDepth)
1211 {
1212 AddFragDepthDeclaration(*root, symbolTable);
1213 }
1214 else if (usesFragDepthEXT)
1215 {
1216 AddFragDepthEXTDeclaration(*this, *root, symbolTable);
1217 }
1218
1219 if (usesSecondaryFragColorEXT)
1220 {
1221 AddFragColorDeclaration(*root, symbolTable,
1222 *BuiltInVariable::gl_SecondaryFragColorEXT());
1223 }
1224 else if (usesSecondaryFragDataEXT)
1225 {
1226 if (!AddFragDataDeclaration(*this, *root, usesSecondaryFragDataEXT, true))
1227 {
1228 return false;
1229 }
1230 }
1231
1232 bool usesSampleInterpolation = false;
1233 bool usesSampleInterpolant = false;
1234 if ((getShaderVersion() >= 320 ||
1235 IsExtensionEnabled(getExtensionBehavior(),
1236 TExtension::OES_shader_multisample_interpolation)) &&
1237 !RewriteInterpolants(*this, *root, symbolTable, driverUniforms,
1238 &usesSampleInterpolation, &usesSampleInterpolant))
1239 {
1240 return false;
1241 }
1242
1243 if (usesSampleID || (usesSampleMaskIn && usesSampleInterpolation) || usesSampleInterpolant)
1244 {
1245 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_SampleID());
1246 }
1247
1248 if (usesSamplePosition)
1249 {
1250 if (!AddSamplePositionDeclaration(*this, *root, symbolTable, driverUniforms))
1251 {
1252 return false;
1253 }
1254 }
1255
1256 if (usesSampleMaskIn)
1257 {
1258 if (!AddSampleMaskInDeclaration(*this, *root, symbolTable, driverUniforms,
1259 usesSampleID || usesSampleInterpolation))
1260 {
1261 return false;
1262 }
1263 }
1264
1265 ASSERT(!usesSampleMask || isSampleMaskAllowed());
1266
1267 if (usesPointCoord)
1268 {
1269 TIntermTyped *flipNegXY =
1270 driverUniforms->getNegFlipXY(&getSymbolTable(), DriverUniformFlip::Fragment);
1271 TIntermConstantUnion *pivot = CreateFloatNode(0.5f, EbpMedium);
1272 if (!FlipBuiltinVariable(this, root, GetMainSequence(root), flipNegXY,
1273 &getSymbolTable(), BuiltInVariable::gl_PointCoord(),
1274 kFlippedPointCoordName, pivot))
1275 {
1276 return false;
1277 }
1278 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_PointCoord());
1279 }
1280
1281 if (usesFragCoord || compileOptions.emulateAlphaToCoverage ||
1282 compileOptions.metal.generateShareableShaders)
1283 {
1284 if (!InsertFragCoordCorrection(this, compileOptions, root, GetMainSequence(root),
1285 &getSymbolTable(), driverUniforms))
1286 {
1287 return false;
1288 }
1289 const TVariable *fragCoord = static_cast<const TVariable *>(
1290 getSymbolTable().findBuiltIn(ImmutableString("gl_FragCoord"), getShaderVersion()));
1291 DeclareRightBeforeMain(*root, *fragCoord);
1292 }
1293
1294 if (!RewriteDfdy(this, root, &getSymbolTable(), getShaderVersion(), specConst,
1295 driverUniforms))
1296 {
1297 return false;
1298 }
1299
1300 if (getClipDistanceArraySize())
1301 {
1302 if (!EmulateClipDistanceVaryings(this, root, &getSymbolTable(), getShaderType()))
1303 {
1304 return false;
1305 }
1306 }
1307
1308 if (usesFrontFacing)
1309 {
1310 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_FrontFacing());
1311 }
1312
1313 bool usesNumSamples = false;
1314 for (const ShaderVariable &uniform : mUniforms)
1315 {
1316 if (uniform.name == "gl_NumSamples")
1317 {
1318 usesNumSamples = true;
1319 break;
1320 }
1321 }
1322
1323 if (usesNumSamples)
1324 {
1325 if (!AddNumSamplesDeclaration(*this, *root, symbolTable))
1326 {
1327 return false;
1328 }
1329 }
1330 }
1331 else if (getShaderType() == GL_VERTEX_SHADER)
1332 {
1333 DeclareRightBeforeMain(*root, *BuiltInVariable::gl_Position());
1334
1335 if (FindSymbolNode(root, BuiltInVariable::gl_PointSize()->name()))
1336 {
1337 const TVariable *pointSize = static_cast<const TVariable *>(
1338 getSymbolTable().findBuiltIn(ImmutableString("gl_PointSize"), getShaderVersion()));
1339 DeclareRightBeforeMain(*root, *pointSize);
1340 }
1341
1342 if (FindSymbolNode(root, BuiltInVariable::gl_VertexIndex()->name()))
1343 {
1344 if (!ReplaceVariable(this, root, BuiltInVariable::gl_VertexIndex(), &kgl_VertexIDMetal))
1345 {
1346 return false;
1347 }
1348 DeclareRightBeforeMain(*root, kgl_VertexIDMetal);
1349 }
1350
1351 // Append a macro for transform feedback substitution prior to modifying depth.
1352 if (!AppendVertexShaderTransformFeedbackOutputToMain(*this, symbolEnv, *root))
1353 {
1354 return false;
1355 }
1356
1357 if (getClipDistanceArraySize())
1358 {
1359 if (!ZeroDisabledClipDistanceAssignments(this, root, &getSymbolTable(), getShaderType(),
1360 driverUniforms->getClipDistancesEnabled()))
1361 {
1362 return false;
1363 }
1364
1365 if (IsExtensionEnabled(getExtensionBehavior(), TExtension::ANGLE_clip_cull_distance) &&
1366 !EmulateClipDistanceVaryings(this, root, &getSymbolTable(), getShaderType()))
1367 {
1368 return false;
1369 }
1370 }
1371
1372 if (!transformDepthBeforeCorrection(root, driverUniforms))
1373 {
1374 return false;
1375 }
1376
1377 if (!appendVertexShaderDepthCorrectionToMain(root, driverUniforms))
1378 {
1379 return false;
1380 }
1381 }
1382
1383 if (getShaderType() == GL_VERTEX_SHADER)
1384 {
1385 TIntermTyped *flipNegY =
1386 driverUniforms->getFlipXY(&getSymbolTable(), DriverUniformFlip::PreFragment);
1387 flipNegY = (new TIntermSwizzle(flipNegY, {1}))->fold(nullptr);
1388
1389 if (!AppendVertexShaderPositionYCorrectionToMain(this, root, &getSymbolTable(), flipNegY))
1390 {
1391 return false;
1392 }
1393 if (!insertRasterizationDiscardLogic(*root))
1394 {
1395 return false;
1396 }
1397 }
1398 else if (getShaderType() == GL_FRAGMENT_SHADER)
1399 {
1400 if (isSampleMaskAllowed())
1401 {
1402 mValidateASTOptions.validateVariableReferences = false;
1403 if (!AddSampleMaskDeclaration(*this, *root, symbolTable, driverUniforms,
1404 compileOptions.emulateAlphaToCoverage ||
1405 compileOptions.metal.generateShareableShaders,
1406 usesSampleMask))
1407 {
1408 return false;
1409 }
1410 }
1411 }
1412
1413 if (!validateAST(root))
1414 {
1415 return false;
1416 }
1417
1418 // This is the largest size required to pass all the tests in
1419 // (dEQP-GLES3.functional.shaders.large_constant_arrays)
1420 // This value could in principle be smaller.
1421 const size_t hoistThresholdSize = 256;
1422 if (!HoistConstants(*this, *root, idGen, hoistThresholdSize))
1423 {
1424 return false;
1425 }
1426
1427 if (!ConvertUnsupportedConstructorsToFunctionCalls(*this, *root))
1428 {
1429 return false;
1430 }
1431
1432 const bool needsExplicitBoolCasts = compileOptions.addExplicitBoolCasts;
1433 if (!AddExplicitTypeCasts(*this, *root, symbolEnv, needsExplicitBoolCasts))
1434 {
1435 return false;
1436 }
1437
1438 if (!SeparateCompoundStructDeclarations(*this, idGen, *root, &getSymbolTable()))
1439 {
1440 return false;
1441 }
1442
1443 if (!SeparateCompoundExpressions(*this, symbolEnv, idGen, *root))
1444 {
1445 return false;
1446 }
1447
1448 if (!ReduceInterfaceBlocks(*this, *root, idGen, &getSymbolTable()))
1449 {
1450 return false;
1451 }
1452
1453 // The RewritePipelines phase leaves the tree in an inconsistent state by inserting
1454 // references to structures like "ANGLE_TextureEnv<metal::texture2d<float>>" which are
1455 // defined in text (in ProgramPrelude), outside of the knowledge of the AST.
1456 mValidateASTOptions.validateStructUsage = false;
1457 // The RewritePipelines phase also generates incoming arguments to synthesized
1458 // functions that use are missing qualifiers - for example, angleUniforms isn't marked
1459 // as an incoming argument.
1460 mValidateASTOptions.validateQualifiers = false;
1461
1462 PipelineStructs pipelineStructs;
1463 if (!RewritePipelines(*this, *root, getInputVaryings(), getOutputVaryings(), idGen,
1464 *driverUniforms, symbolEnv, pipelineStructs))
1465 {
1466 return false;
1467 }
1468 if (getShaderType() == GL_VERTEX_SHADER)
1469 {
1470 // This has to happen after RewritePipelines.
1471 if (!IntroduceVertexAndInstanceIndex(*this, *root))
1472 {
1473 return false;
1474 }
1475 }
1476
1477 if (!RewriteCaseDeclarations(*this, *root))
1478 {
1479 return false;
1480 }
1481
1482 if (!RewriteUnaddressableReferences(*this, *root, symbolEnv))
1483 {
1484 return false;
1485 }
1486
1487 if (!RewriteOutArgs(*this, *root, symbolEnv))
1488 {
1489 return false;
1490 }
1491 if (!FixTypeConstructors(*this, symbolEnv, *root))
1492 {
1493 return false;
1494 }
1495 if (!ToposortStructs(*this, symbolEnv, *root, ppc))
1496 {
1497 return false;
1498 }
1499 if (!EmitMetal(*this, *root, idGen, pipelineStructs, symbolEnv, ppc, compileOptions))
1500 {
1501 return false;
1502 }
1503
1504 ASSERT(validateAST(root));
1505
1506 return true;
1507 }
1508
translate(TIntermBlock * root,const ShCompileOptions & compileOptions,PerformanceDiagnostics * perfDiagnostics)1509 bool TranslatorMSL::translate(TIntermBlock *root,
1510 const ShCompileOptions &compileOptions,
1511 PerformanceDiagnostics *perfDiagnostics)
1512 {
1513 if (!root)
1514 {
1515 return false;
1516 }
1517
1518 // TODO: refactor the code in TranslatorMSL to not issue raw function calls.
1519 // http://anglebug.com/6059#c2
1520 mValidateASTOptions.validateNoRawFunctionCalls = false;
1521 // A validation error is generated in this backend due to bool uniforms.
1522 mValidateASTOptions.validatePrecision = false;
1523
1524 TInfoSinkBase &sink = getInfoSink().obj;
1525 SpecConst specConst(&getSymbolTable(), compileOptions, getShaderType());
1526 DriverUniformMetal driverUniforms(DriverUniformMode::Structure);
1527 if (!translateImpl(sink, root, compileOptions, perfDiagnostics, &specConst, &driverUniforms))
1528 {
1529 return false;
1530 }
1531
1532 return true;
1533 }
shouldFlattenPragmaStdglInvariantAll()1534 bool TranslatorMSL::shouldFlattenPragmaStdglInvariantAll()
1535 {
1536 // Not neccesary for MSL transformation.
1537 return false;
1538 }
1539
1540 } // namespace sh
1541