• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  **
3  ** Copyright 2011, The Android Open Source Project
4  **
5  ** Licensed under the Apache License, Version 2.0 (the "License");
6  ** you may not use this file except in compliance with the License.
7  ** You may obtain a copy of the License at
8  **
9  **     http://www.apache.org/licenses/LICENSE-2.0
10  **
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  */
17 
18 #include "src/pixelflinger2/pixelflinger2.h"
19 #include "src/pixelflinger2/llvm_helper.h"
20 #include "src/mesa/main/mtypes.h"
21 
22 #include <llvm/Module.h>
23 
24 //#undef ALOGD
25 //#define ALOGD(...)
26 
27 using namespace llvm;
28 
StencilOp(IRBuilder<> & builder,const unsigned char op,Value * sPtr,Value * sRef)29 static void StencilOp(IRBuilder<> &builder, const unsigned char op,
30                       Value * sPtr, Value * sRef)
31 {
32    CondBranch condBranch(builder);
33    Value * s = builder.CreateLoad(sPtr, "stenciOpS");
34    switch (op) {
35    case 0 : // GL_ZERO
36       builder.CreateStore(builder.getInt8(0), sPtr);
37       break;
38    case 1 : // GL_KEEP
39       builder.CreateStore(s, sPtr);
40       break;
41    case 2 : // GL_REPLACE
42       builder.CreateStore(sRef, sPtr);
43       break;
44    case 3 : // GL_INCR
45       condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255)));
46       builder.CreateStore(s, sPtr);
47       condBranch.elseop();
48       builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
49       condBranch.endif();
50       break;
51    case 4 : // GL_DECR
52       condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0)));
53       builder.CreateStore(s, sPtr);
54       condBranch.elseop();
55       builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
56       condBranch.endif();
57       break;
58    case 5 : // GL_INVERT
59       builder.CreateStore(builder.CreateNot(s), sPtr);
60       break;
61    case 6 : // GL_INCR_WRAP
62       builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
63       break;
64    case 7 : // GL_DECR_WRAP
65       builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
66       break;
67    default:
68       assert(0);
69       break;
70    }
71 }
72 
StencilOp(IRBuilder<> & builder,Value * face,const unsigned char frontOp,const unsigned char backOp,Value * sPtr,Value * sRef)73 static Value * StencilOp(IRBuilder<> & builder, Value * face,
74                          const unsigned char frontOp, const unsigned char backOp,
75                          Value * sPtr, Value * sRef)
76 {
77    CondBranch condBranch(builder);
78    if (frontOp != backOp)
79       condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0)));
80 
81    StencilOp(builder, frontOp, sPtr, sRef);
82 
83    if (frontOp != backOp) {
84       condBranch.elseop();
85       StencilOp(builder, backOp, sPtr, sRef);
86       condBranch.endif();
87    }
88    return builder.CreateLoad(sPtr);
89 }
90 
StencilFunc(IRBuilder<> & builder,const unsigned char func,Value * s,Value * sRef,Value * sCmpPtr)91 static void StencilFunc(IRBuilder<> & builder, const unsigned char func,
92                         Value * s, Value * sRef, Value * sCmpPtr)
93 {
94    switch (func) {
95    case GL_NEVER & 0x7:
96       builder.CreateStore(builder.getFalse(), sCmpPtr);
97       break;
98    case GL_LESS & 0x7:
99       builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr);
100       break;
101    case GL_EQUAL & 0x7:
102       builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr);
103       break;
104    case GL_LEQUAL & 0x7:
105       builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr);
106       break;
107    case GL_GREATER & 0x7:
108       builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr);
109       break;
110    case GL_NOTEQUAL & 0x7:
111       builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr);
112       break;
113    case GL_GEQUAL & 0x7:
114       builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr);
115       break;
116    case GL_ALWAYS & 0x7:
117       builder.CreateStore(builder.getTrue(), sCmpPtr);
118       break;
119    default:
120       assert(0);
121       break;
122    }
123 }
124 
BlendFactor(const unsigned mode,Value * src,Value * dst,Value * constant,Value * one,Value * zero,Value * srcA,Value * dstA,Value * constantA,Value * sOne,const bool isVector,IRBuilder<> & builder)125 static Value * BlendFactor(const unsigned mode, Value * src, Value * dst,
126                            Value * constant, Value * one, Value * zero,
127                            Value * srcA, Value * dstA, Value * constantA,
128                            Value * sOne, const bool isVector, IRBuilder<> & builder)
129 {
130    Value * factor = NULL;
131    switch (mode) {
132    case GGLBlendState::GGL_ZERO:
133       factor = zero;
134       break;
135    case GGLBlendState::GGL_ONE:
136       factor = one;
137       break;
138    case GGLBlendState::GGL_SRC_COLOR:
139       factor = src;
140       break;
141    case GGLBlendState::GGL_ONE_MINUS_SRC_COLOR:
142       factor = builder.CreateSub(one, src);
143       break;
144    case GGLBlendState::GGL_DST_COLOR:
145       factor = dst;
146       break;
147    case GGLBlendState::GGL_ONE_MINUS_DST_COLOR:
148       factor = builder.CreateSub(one, dst);
149       break;
150    case GGLBlendState::GGL_SRC_ALPHA:
151       factor = srcA;
152       if (isVector)
153          factor = intVec(builder, factor, factor, factor, factor);
154       break;
155    case GGLBlendState::GGL_ONE_MINUS_SRC_ALPHA:
156       factor = builder.CreateSub(sOne, srcA);
157       if (isVector)
158          factor = intVec(builder, factor, factor, factor, factor);
159       break;
160    case GGLBlendState::GGL_DST_ALPHA:
161       factor = dstA;
162       if (isVector)
163          factor = intVec(builder, factor, factor, factor, factor);
164       break;
165    case GGLBlendState::GGL_ONE_MINUS_DST_ALPHA:
166       factor = builder.CreateSub(sOne, dstA);
167       if (isVector)
168          factor = intVec(builder, factor, factor, factor, factor);
169       break;
170    case GGLBlendState::GGL_SRC_ALPHA_SATURATE:
171       // valid only for source color and alpha
172       factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA));
173       if (isVector)
174          factor = intVec(builder, factor, factor, factor, sOne);
175       else
176          factor = sOne; // when it's used for source alpha, it's just 1
177       break;
178    case GGLBlendState::GGL_CONSTANT_COLOR:
179       factor = constant;
180       break;
181    case GGLBlendState::GGL_ONE_MINUS_CONSTANT_COLOR:
182       factor = builder.CreateSub(one, constant);
183       break;
184    case GGLBlendState::GGL_CONSTANT_ALPHA:
185       factor = constantA;
186       if (isVector)
187          factor = intVec(builder, factor, factor, factor, factor);
188       break;
189    case GGLBlendState::GGL_ONE_MINUS_CONSTANT_ALPHA:
190       factor = builder.CreateSub(sOne, constantA);
191       if (isVector)
192          factor = intVec(builder, factor, factor, factor, factor);
193       break;
194    default:
195       assert(0);
196       break;
197    }
198    return factor;
199 }
200 
Saturate(IRBuilder<> & builder,Value * intVector)201 static Value * Saturate(IRBuilder<> & builder, Value * intVector)
202 {
203    intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0));
204    return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255));
205 }
206 
207 // src is int32x4 [0,255] rgba vector, and combines them into int32
208 // RGB_565 channel order is weird
IntVectorToScreenColor(IRBuilder<> & builder,const GGLPixelFormat format,Value * src)209 static Value * IntVectorToScreenColor(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
210 {
211    if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
212       src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24));
213       std::vector<Value *> comps = extractVector(builder, src);
214       comps[0] = builder.CreateOr(comps[0], comps[1]);
215       comps[0] = builder.CreateOr(comps[0], comps[2]);
216       comps[0] = builder.CreateOr(comps[0], comps[3]);
217       return comps[0];
218    } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
219       src = builder.CreateAnd(src, constIntVec(builder, 0xf8, 0xfc, 0xf8, 0));
220       std::vector<Value *> comps = extractVector(builder, src);
221       // channel order is weird
222       for (unsigned i = 0; i < 4; i++)
223          comps[i] = builder.CreateTrunc(comps[i], builder.getInt16Ty());
224       comps[2] = builder.CreateLShr(comps[2], 3);
225       comps[1] = builder.CreateShl(comps[1], 3);
226       comps[0] = builder.CreateShl(comps[0], 8);
227 
228       comps[0] = builder.CreateOr(comps[0], comps[1]);
229       comps[0] = builder.CreateOr(comps[0], comps[2]);
230       return comps[0];
231    } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
232       return builder.getInt32(0);
233    else
234       assert(0);
235    return NULL;
236 }
237 
238 // src is int32 or int16, return is int32x4 [0,255] rgba
239 // RGB_565 channel order is weird
ScreenColorToIntVector(IRBuilder<> & builder,const GGLPixelFormat format,Value * src)240 static Value * ScreenColorToIntVector(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
241 {
242    src = builder.CreateZExt(src, builder.getInt32Ty());
243    Value * dst = intVec(builder, src, src, src, src);
244    if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
245       dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24));
246       dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff));
247    } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
248       // channel order is weird
249       dst = builder.CreateAnd(dst, constIntVec(builder, 0xf800, 0x7e0, 0x1f, 0));
250       dst = builder.CreateLShr(dst, constIntVec(builder, 8, 3, 0, 0));
251       dst = builder.CreateShl(dst, constIntVec(builder, 0, 0, 3, 0));
252       dst = builder.CreateOr(dst, constIntVec(builder, 0, 0, 0, 0xff));
253    } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
254       ALOGD("pf2: ScreenColorToIntVector GGL_PIXEL_FORMAT_UNKNOWN"); // not set yet, do nothing
255    else
256       assert(0);
257    return dst;
258 }
259 
260 // src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32
GenerateFSBlend(const GGLState * gglCtx,const GGLPixelFormat format,IRBuilder<> & builder,Value * src,Value * dst)261 Value * GenerateFSBlend(const GGLState * gglCtx, const GGLPixelFormat format, /*const RegDesc * regDesc,*/
262                         IRBuilder<> & builder, Value * src, Value * dst)
263 {
264    Type * const intType = builder.getInt32Ty();
265 
266    // TODO cast the outputs pointer type to int for writing to minimize bandwidth
267    if (!gglCtx->blendState.enable) {
268 //        if (regDesc->IsInt32Color())
269 //        {
270 //            debug_printf("GenerateFixedFS dst is already scalar fixed0 \n");
271 //            src = builder.CreateExtractElement(src, builder.getInt32(0));
272 //            src = builder.CreateBitCast(src, intType); // it's already RGBA int32
273 //        }
274 //        else if (regDesc->IsVectorType(Float))
275 //        {
276       src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
277       src = builder.CreateFPToSI(src, intVecType(builder));
278       src = Saturate(builder, src);
279       src = IntVectorToScreenColor(builder, format, src);
280 //        }
281 //        else if (regDesc->IsVectorType(Fixed8))
282 //        {
283 //            src = builder.CreateBitCast(src, instr->GetIntVectorType());
284 //            src = Saturate(instr, src);
285 //            src = IntVectorToColor(instr, storage, src);
286 //        }
287 //        else if (regDesc->IsVectorType(Fixed16))
288 //        {
289 //            src = builder.CreateBitCast(src, instr->GetIntVectorType());
290 //            src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
291 //            src = Saturate(instr, src);
292 //            src = IntVectorToColor(instr, storage, src);
293 //        }
294 //        else
295 //            assert(0);
296       return src;
297    }
298    // blending, so convert src to <4 x i32>
299 //    if (regDesc->IsInt32Color())
300 //    {
301 //        src = builder.CreateExtractElement(src, builder.getInt32(0));
302 //        src = builder.CreateBitCast(src, intType); // it's already RGBA int32
303 //
304 //        Value * channels = Constant::getNullValue(instr->GetIntVectorType());
305 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(0));
306 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(1));
307 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(2));
308 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(3));
309 //        channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24));
310 //        channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff));
311 //        src = channels;
312 //    }
313 //    else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA
314 //        src = builder.CreateBitCast(src, instr->GetIntVectorType());
315 //    else if (regDesc->IsVectorType(Fixed16))
316 //    {
317 //        src = builder.CreateBitCast(src, instr->GetIntVectorType());
318 //        // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision
319 //        src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
320 //    }
321 //    else if (regDesc->IsVectorType(Float))
322 //    {
323    src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
324    src = builder.CreateFPToSI(src, intVecType(builder));
325 //    }
326 //    else
327 //        assert(0);
328 
329    Value * const one = constIntVec(builder,255,255,255,255);
330    Value * const zero = constIntVec(builder,0,0,0,0);
331    Value * const sOne = builder.getInt32(255);
332    Value * const sZero = builder.getInt32(0);
333 
334 #if USE_LLVM_SCANLINE
335    Value * constant = constIntVec(builder,gglCtx->blendState.color[0],
336                                   gglCtx->blendState.color[1],
337                                   gglCtx->blendState.color[2],
338                                   gglCtx->blendState.color[3]);
339 #else
340    Value * constant = NULL;
341    assert(0);
342 #endif
343 
344    Value * srcA = extractVector(builder,src)[3];
345    Value * dstA = extractVector(builder,dst)[3];
346    Value * constantA = extractVector(builder,constant)[3];
347 
348    Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst,
349                             constant, one, zero, srcA, dstA,
350                             constantA, sOne, true, builder);
351    if (gglCtx->blendState.scf != gglCtx->blendState.saf) {
352       Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA,
353                                 constantA, sOne, sZero, srcA, dstA,
354                                 constantA, sOne, false, builder);
355       sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3),
356                                        name("sfAStore"));
357    }
358 
359    Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst,
360                             constant, one, zero, srcA, dstA,
361                             constantA, sOne, true, builder);
362    if (gglCtx->blendState.dcf != gglCtx->blendState.daf) {
363       Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA,
364                                 constantA, sOne, sZero, srcA, dstA,
365                                 constantA, sOne, false, builder);
366       df = builder.CreateInsertElement(df, dfA, builder.getInt32(3),
367                                        name("dfAStore"));
368    }
369 
370    // this is factor *= 256 / 255; factors have a chance of constant folding
371    sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7)));
372    df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7)));
373 
374    src = builder.CreateMul(src, sf);
375    dst = builder.CreateMul(dst, df);
376 
377    Value * res = NULL;
378    switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
379    case GL_FUNC_ADD:
380       res = builder.CreateAdd(src, dst);
381       break;
382    case GL_FUNC_SUBTRACT:
383       res = builder.CreateSub(src, dst);
384       break;
385    case GL_FUNC_REVERSE_SUBTRACT:
386       res = builder.CreateSub(dst, src);
387       break;
388    default:
389       assert(0);
390       break;
391    }
392    if (gglCtx->blendState.ce != gglCtx->blendState.ae) {
393       srcA = extractVector(builder,src)[3];
394       dstA = extractVector(builder,dst)[3];
395       Value * resA = NULL;
396       switch (gglCtx->blendState.ae + GL_FUNC_ADD) {
397       case GL_FUNC_ADD:
398          resA = builder.CreateAdd(srcA, dstA);
399          break;
400       case GL_FUNC_SUBTRACT:
401          resA = builder.CreateSub(srcA, dstA);
402          break;
403       case GL_FUNC_REVERSE_SUBTRACT:
404          resA = builder.CreateSub(dstA, srcA);
405          break;
406       default:
407          assert(0);
408          break;
409       }
410       res = builder.CreateInsertElement(res, resA, builder.getInt32(3),
411                                         name("resAStore"));
412    }
413 
414    res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8));
415    res = Saturate(builder, res);
416    res = IntVectorToScreenColor(builder, format, res);
417    return res;
418 }
419 
ScanLineFunctionType(IRBuilder<> & builder)420 static FunctionType * ScanLineFunctionType(IRBuilder<> & builder)
421 {
422    std::vector<Type*> funcArgs;
423    VectorType * vectorType = floatVecType(builder);
424    PointerType * vectorPtr = PointerType::get(vectorType, 0);
425    Type * intType = builder.getInt32Ty();
426    PointerType * intPointerType = PointerType::get(intType, 0);
427    PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0);
428 
429    funcArgs.push_back(vectorPtr); // start
430    funcArgs.push_back(vectorPtr); // step
431    funcArgs.push_back(vectorPtr); // constants
432    funcArgs.push_back(intPointerType); // frame
433    funcArgs.push_back(intPointerType); // depth
434    funcArgs.push_back(bytePointerType); // stencil
435    funcArgs.push_back(bytePointerType); // stencil state
436    funcArgs.push_back(intType); // count
437 
438    FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(),
439                                                   llvm::ArrayRef<Type*>(funcArgs),
440                                                   /*isVarArg=*/false);
441 
442    return functionType;
443 }
444 
445 // generated scanline function parameters are VertexOutput * start, VertexOutput * step,
446 // unsigned * frame, int * depth, unsigned char * stencil,
447 // GGLActiveStencilState * stencilState, unsigned count
GenerateScanLine(const GGLState * gglCtx,const gl_shader_program * program,Module * mod,const char * shaderName,const char * scanlineName)448 void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, Module * mod,
449                       const char * shaderName, const char * scanlineName)
450 {
451    IRBuilder<> builder(mod->getContext());
452 //   debug_printf("GenerateScanLine %s \n", scanlineName);
453 
454    Type * intType = builder.getInt32Ty();
455    PointerType * intPointerType = PointerType::get(intType, 0);
456    Type * byteType = builder.getInt8Ty();
457    PointerType * bytePointerType = PointerType::get(byteType, 0);
458 
459    Function * func = mod->getFunction(scanlineName);
460    if (func)
461       return;
462 
463    func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName,
464                                ScanLineFunctionType(builder)));
465 
466    BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0);
467    builder.SetInsertPoint(label_entry);
468    CondBranch condBranch(builder);
469 
470    Function::arg_iterator args = func->arg_begin();
471    Value * start = args++;
472    start->setName("start");
473    Value * step = args++;
474    step->setName("step");
475    Value * constants = args++;
476    constants->setName("constants");
477 
478    // need alloc to be able to assign to it by using store
479    Value * framePtr = builder.CreateAlloca(intPointerType);
480    builder.CreateStore(args++, framePtr);
481    Value * depthPtr = builder.CreateAlloca(intPointerType);
482    builder.CreateStore(args++, depthPtr);
483    Value * stencilPtr = builder.CreateAlloca(bytePointerType);
484    builder.CreateStore(args++, stencilPtr);
485    Value * stencilState = args++;
486    stencilState->setName("stencilState");
487    Value * countPtr = builder.CreateAlloca(intType);
488    builder.CreateStore(args++, countPtr);
489 
490    Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL;
491    if (gglCtx->bufferState.stencilTest) {
492       sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace");
493       if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref)
494          sRef = builder.getInt8(gglCtx->frontStencil.ref);
495       else
496          sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef");
497       if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask)
498          sMask = builder.getInt8(gglCtx->frontStencil.mask);
499       else
500          sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask");
501       if (gglCtx->frontStencil.func == gglCtx->backStencil.func)
502          sFunc = builder.getInt8(gglCtx->frontStencil.func);
503       else
504          sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc");
505    }
506 
507    condBranch.beginLoop(); // while (count > 0)
508 
509    assert(framePtr && gglCtx);
510    // get values
511    Value * frame = NULL;
512    if (GGL_PIXEL_FORMAT_RGBA_8888 == gglCtx->bufferState.colorFormat)
513       frame = builder.CreateLoad(framePtr);
514    else if (GGL_PIXEL_FORMAT_RGB_565 == gglCtx->bufferState.colorFormat) {
515       frame = builder.CreateLoad(framePtr);
516       frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt16Ty(), 0));
517    } else if (GGL_PIXEL_FORMAT_UNKNOWN == gglCtx->bufferState.colorFormat)
518       frame = builder.CreateLoad(framePtr); // color buffer not set yet
519    else
520       assert(0);
521 
522    frame->setName("frame");
523    Value * depth = NULL, * stencil = NULL;
524    if (gglCtx->bufferState.depthTest) {
525       assert(GGL_PIXEL_FORMAT_Z_32 == gglCtx->bufferState.depthFormat);
526       depth = builder.CreateLoad(depthPtr);
527       depth->setName("depth");
528    }
529 
530    Value * count = builder.CreateLoad(countPtr);
531    count->setName("count");
532 
533    Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0));
534    condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0)
535    condBranch.brk(); // break;
536    condBranch.endif();
537 
538    Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL;
539    if (gglCtx->bufferState.stencilTest) {
540       stencil = builder.CreateLoad(stencilPtr);
541       stencil->setName("stencil");
542 
543       // temporaries to load/store value
544       sCmpPtr = builder.CreateAlloca(builder.getInt1Ty());
545       sCmpPtr->setName("sCmpPtr");
546       sPtr = builder.CreateAlloca(byteType);
547       sPtr->setName("sPtr");
548 
549       s = builder.CreateLoad(stencil);
550       s = builder.CreateAnd(s, sMask);
551       builder.CreateStore(s, sPtr);
552 
553       if (gglCtx->frontStencil.func != gglCtx->backStencil.func)
554          condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0)));
555 
556       StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr);
557 
558       if (gglCtx->frontStencil.func != gglCtx->backStencil.func) {
559          condBranch.elseop();
560          StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr);
561          condBranch.endif();
562       }
563 
564       sCmp = builder.CreateLoad(sCmpPtr);
565    } else
566       sCmp = ConstantInt::getTrue(mod->getContext());
567    sCmp->setName("sCmp");
568 
569    Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL;
570    if (gglCtx->bufferState.depthTest) {
571       depthZ  = builder.CreateLoad(depth, "depthZ"); // z stored in buffer
572       zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z
573       zPtr->setName("zPtr");
574 
575       // modified incoming z
576       z = builder.CreateBitCast(start, intPointerType);
577       z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET +
578                                              GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
579       z = builder.CreateLoad(z, "z");
580 
581       builder.CreateStore(z, zPtr);
582 
583       Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0));
584       condBranch.ifCond(zNegative);
585       // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int
586       z = builder.CreateXor(z, builder.getInt32(0x7fffffff));
587       builder.CreateStore(z, zPtr);
588 
589       condBranch.endif();
590 
591       z = builder.CreateLoad(zPtr, "z");
592 
593       switch (0x200 | gglCtx->bufferState.depthFunc) {
594       case GL_NEVER:
595          zCmp = ConstantInt::getFalse(mod->getContext());
596          break;
597       case GL_LESS:
598          zCmp = builder.CreateICmpSLT(z, depthZ);
599          break;
600       case GL_EQUAL:
601          zCmp = builder.CreateICmpEQ(z, depthZ);
602          break;
603       case GL_LEQUAL:
604          zCmp = builder.CreateICmpSLE(z, depthZ);
605          break;
606       case GL_GREATER:
607          zCmp = builder.CreateICmpSGT(z, depthZ);
608          break;
609       case GL_NOTEQUAL:
610          zCmp = builder.CreateICmpNE(z, depthZ);
611          break;
612       case GL_GEQUAL:
613          zCmp = builder.CreateICmpSGE(z, depthZ);
614          break;
615       case GL_ALWAYS:
616          zCmp = ConstantInt::getTrue(mod->getContext());
617          break;
618       default:
619          assert(0);
620          break;
621       }
622    } else // no depth test means always pass
623       zCmp = ConstantInt::getTrue(mod->getContext());
624    zCmp->setName("zCmp");
625 
626    condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail");
627    condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail");
628 
629    Value * inputs = start;
630    Value * outputs = start;
631 
632    Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start,
633                        offsetof(VertexOutput,fragColor)/sizeof(Vector4));
634 
635    Function * fsFunction = mod->getFunction(shaderName);
636    assert(fsFunction);
637    CallInst *call = builder.CreateCall3(fsFunction,inputs, outputs, constants);
638    call->setCallingConv(CallingConv::C);
639    call->setTailCall(false);
640 
641    Value * dst = Constant::getNullValue(intVecType(builder));
642    if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) {
643       Value * frameColor = builder.CreateLoad(frame, "frameColor");
644       dst = ScreenColorToIntVector(builder, gglCtx->bufferState.colorFormat, frameColor);
645    }
646 
647    Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0);
648    src = builder.CreateLoad(src);
649 
650    Value * color = GenerateFSBlend(gglCtx, gglCtx->bufferState.colorFormat,/*&prog->outputRegDesc,*/ builder, src, dst);
651    builder.CreateStore(color, frame);
652    // TODO DXL depthmask check
653    if (gglCtx->bufferState.depthTest) {
654       z = builder.CreateBitCast(z, intType);
655       builder.CreateStore(z, depth); // store z
656    }
657 
658    if (gglCtx->bufferState.stencilTest)
659       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass,
660                                     gglCtx->backStencil.dPass, sPtr, sRef), stencil);
661 
662    condBranch.elseop(); // failed z test
663 
664    if (gglCtx->bufferState.stencilTest)
665       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail,
666                                     gglCtx->backStencil.dFail, sPtr, sRef), stencil);
667    condBranch.endif();
668    condBranch.elseop(); // failed s test
669 
670    if (gglCtx->bufferState.stencilTest)
671       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail,
672                                     gglCtx->backStencil.sFail, sPtr, sRef), stencil);
673 
674    condBranch.endif();
675    assert(frame);
676    frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++
677    // frame may have been casted to short* from int*, so cast back
678    frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt32Ty(), 0));
679    builder.CreateStore(frame, framePtr);
680    if (gglCtx->bufferState.depthTest) {
681       depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++
682       builder.CreateStore(depth, depthPtr);
683    }
684    if (gglCtx->bufferState.stencilTest) {
685       stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++
686       builder.CreateStore(stencil, stencilPtr);
687    }
688    Value * vPtr = NULL, * v = NULL, * dx = NULL;
689    if (program->UsesFragCoord) {
690       vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
691              GGL_FS_INPUT_FRAGCOORD_INDEX);
692       v = builder.CreateLoad(vPtr);
693       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
694                                               GGL_FS_INPUT_FRAGCOORD_INDEX);
695       dx = builder.CreateLoad(dx);
696       v = builder.CreateFAdd(v, dx);
697       builder.CreateStore(v, vPtr);
698    } else if (gglCtx->bufferState.depthTest) {
699       Type * floatType = builder.getFloatTy();
700       PointerType * floatPointerType = PointerType::get(floatType, 0);
701       vPtr = builder.CreateBitCast(start, floatPointerType);
702       vPtr = builder.CreateConstInBoundsGEP1_32(vPtr,
703              (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
704       v = builder.CreateLoad(vPtr);
705       dx = builder.CreateBitCast(step, floatPointerType);
706       dx = builder.CreateConstInBoundsGEP1_32(dx,
707                                               (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
708       dx = builder.CreateLoad(dx);
709       v = builder.CreateFAdd(v, dx);
710       builder.CreateStore(v, vPtr);
711    }
712 
713    if (program->UsesPointCoord) {
714       vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
715              GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
716       v = builder.CreateLoad(vPtr);
717       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
718                                               GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
719       dx = builder.CreateLoad(dx);
720       v = builder.CreateFAdd(v, dx);
721       builder.CreateStore(v, vPtr);
722    }
723 
724    for (unsigned i = 0; i < program->VaryingSlots; ++i) {
725       vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i);
726       v = builder.CreateLoad(vPtr);
727       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
728                                               GGL_FS_INPUT_VARYINGS_INDEX + i);
729       dx = builder.CreateLoad(dx);
730       v = builder.CreateFAdd(v, dx);
731       builder.CreateStore(v, vPtr);
732    }
733 
734    count = builder.CreateSub(count, builder.getInt32(1));
735    builder.CreateStore(count, countPtr); // count--;
736 
737    condBranch.endLoop();
738 
739    builder.CreateRetVoid();
740 }
741