1 /**
2 **
3 ** Copyright 2011, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include "src/pixelflinger2/pixelflinger2.h"
19 #include "src/pixelflinger2/llvm_helper.h"
20 #include "src/mesa/main/mtypes.h"
21
22 #include <llvm/Module.h>
23
24 //#undef ALOGD
25 //#define ALOGD(...)
26
27 using namespace llvm;
28
StencilOp(IRBuilder<> & builder,const unsigned char op,Value * sPtr,Value * sRef)29 static void StencilOp(IRBuilder<> &builder, const unsigned char op,
30 Value * sPtr, Value * sRef)
31 {
32 CondBranch condBranch(builder);
33 Value * s = builder.CreateLoad(sPtr, "stenciOpS");
34 switch (op) {
35 case 0 : // GL_ZERO
36 builder.CreateStore(builder.getInt8(0), sPtr);
37 break;
38 case 1 : // GL_KEEP
39 builder.CreateStore(s, sPtr);
40 break;
41 case 2 : // GL_REPLACE
42 builder.CreateStore(sRef, sPtr);
43 break;
44 case 3 : // GL_INCR
45 condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255)));
46 builder.CreateStore(s, sPtr);
47 condBranch.elseop();
48 builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
49 condBranch.endif();
50 break;
51 case 4 : // GL_DECR
52 condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0)));
53 builder.CreateStore(s, sPtr);
54 condBranch.elseop();
55 builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
56 condBranch.endif();
57 break;
58 case 5 : // GL_INVERT
59 builder.CreateStore(builder.CreateNot(s), sPtr);
60 break;
61 case 6 : // GL_INCR_WRAP
62 builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
63 break;
64 case 7 : // GL_DECR_WRAP
65 builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
66 break;
67 default:
68 assert(0);
69 break;
70 }
71 }
72
StencilOp(IRBuilder<> & builder,Value * face,const unsigned char frontOp,const unsigned char backOp,Value * sPtr,Value * sRef)73 static Value * StencilOp(IRBuilder<> & builder, Value * face,
74 const unsigned char frontOp, const unsigned char backOp,
75 Value * sPtr, Value * sRef)
76 {
77 CondBranch condBranch(builder);
78 if (frontOp != backOp)
79 condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0)));
80
81 StencilOp(builder, frontOp, sPtr, sRef);
82
83 if (frontOp != backOp) {
84 condBranch.elseop();
85 StencilOp(builder, backOp, sPtr, sRef);
86 condBranch.endif();
87 }
88 return builder.CreateLoad(sPtr);
89 }
90
StencilFunc(IRBuilder<> & builder,const unsigned char func,Value * s,Value * sRef,Value * sCmpPtr)91 static void StencilFunc(IRBuilder<> & builder, const unsigned char func,
92 Value * s, Value * sRef, Value * sCmpPtr)
93 {
94 switch (func) {
95 case GL_NEVER & 0x7:
96 builder.CreateStore(builder.getFalse(), sCmpPtr);
97 break;
98 case GL_LESS & 0x7:
99 builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr);
100 break;
101 case GL_EQUAL & 0x7:
102 builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr);
103 break;
104 case GL_LEQUAL & 0x7:
105 builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr);
106 break;
107 case GL_GREATER & 0x7:
108 builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr);
109 break;
110 case GL_NOTEQUAL & 0x7:
111 builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr);
112 break;
113 case GL_GEQUAL & 0x7:
114 builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr);
115 break;
116 case GL_ALWAYS & 0x7:
117 builder.CreateStore(builder.getTrue(), sCmpPtr);
118 break;
119 default:
120 assert(0);
121 break;
122 }
123 }
124
BlendFactor(const unsigned mode,Value * src,Value * dst,Value * constant,Value * one,Value * zero,Value * srcA,Value * dstA,Value * constantA,Value * sOne,const bool isVector,IRBuilder<> & builder)125 static Value * BlendFactor(const unsigned mode, Value * src, Value * dst,
126 Value * constant, Value * one, Value * zero,
127 Value * srcA, Value * dstA, Value * constantA,
128 Value * sOne, const bool isVector, IRBuilder<> & builder)
129 {
130 Value * factor = NULL;
131 switch (mode) {
132 case GGLBlendState::GGL_ZERO:
133 factor = zero;
134 break;
135 case GGLBlendState::GGL_ONE:
136 factor = one;
137 break;
138 case GGLBlendState::GGL_SRC_COLOR:
139 factor = src;
140 break;
141 case GGLBlendState::GGL_ONE_MINUS_SRC_COLOR:
142 factor = builder.CreateSub(one, src);
143 break;
144 case GGLBlendState::GGL_DST_COLOR:
145 factor = dst;
146 break;
147 case GGLBlendState::GGL_ONE_MINUS_DST_COLOR:
148 factor = builder.CreateSub(one, dst);
149 break;
150 case GGLBlendState::GGL_SRC_ALPHA:
151 factor = srcA;
152 if (isVector)
153 factor = intVec(builder, factor, factor, factor, factor);
154 break;
155 case GGLBlendState::GGL_ONE_MINUS_SRC_ALPHA:
156 factor = builder.CreateSub(sOne, srcA);
157 if (isVector)
158 factor = intVec(builder, factor, factor, factor, factor);
159 break;
160 case GGLBlendState::GGL_DST_ALPHA:
161 factor = dstA;
162 if (isVector)
163 factor = intVec(builder, factor, factor, factor, factor);
164 break;
165 case GGLBlendState::GGL_ONE_MINUS_DST_ALPHA:
166 factor = builder.CreateSub(sOne, dstA);
167 if (isVector)
168 factor = intVec(builder, factor, factor, factor, factor);
169 break;
170 case GGLBlendState::GGL_SRC_ALPHA_SATURATE:
171 // valid only for source color and alpha
172 factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA));
173 if (isVector)
174 factor = intVec(builder, factor, factor, factor, sOne);
175 else
176 factor = sOne; // when it's used for source alpha, it's just 1
177 break;
178 case GGLBlendState::GGL_CONSTANT_COLOR:
179 factor = constant;
180 break;
181 case GGLBlendState::GGL_ONE_MINUS_CONSTANT_COLOR:
182 factor = builder.CreateSub(one, constant);
183 break;
184 case GGLBlendState::GGL_CONSTANT_ALPHA:
185 factor = constantA;
186 if (isVector)
187 factor = intVec(builder, factor, factor, factor, factor);
188 break;
189 case GGLBlendState::GGL_ONE_MINUS_CONSTANT_ALPHA:
190 factor = builder.CreateSub(sOne, constantA);
191 if (isVector)
192 factor = intVec(builder, factor, factor, factor, factor);
193 break;
194 default:
195 assert(0);
196 break;
197 }
198 return factor;
199 }
200
Saturate(IRBuilder<> & builder,Value * intVector)201 static Value * Saturate(IRBuilder<> & builder, Value * intVector)
202 {
203 intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0));
204 return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255));
205 }
206
207 // src is int32x4 [0,255] rgba vector, and combines them into int32
208 // RGB_565 channel order is weird
IntVectorToScreenColor(IRBuilder<> & builder,const GGLPixelFormat format,Value * src)209 static Value * IntVectorToScreenColor(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
210 {
211 if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
212 src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24));
213 std::vector<Value *> comps = extractVector(builder, src);
214 comps[0] = builder.CreateOr(comps[0], comps[1]);
215 comps[0] = builder.CreateOr(comps[0], comps[2]);
216 comps[0] = builder.CreateOr(comps[0], comps[3]);
217 return comps[0];
218 } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
219 src = builder.CreateAnd(src, constIntVec(builder, 0xf8, 0xfc, 0xf8, 0));
220 std::vector<Value *> comps = extractVector(builder, src);
221 // channel order is weird
222 for (unsigned i = 0; i < 4; i++)
223 comps[i] = builder.CreateTrunc(comps[i], builder.getInt16Ty());
224 comps[2] = builder.CreateLShr(comps[2], 3);
225 comps[1] = builder.CreateShl(comps[1], 3);
226 comps[0] = builder.CreateShl(comps[0], 8);
227
228 comps[0] = builder.CreateOr(comps[0], comps[1]);
229 comps[0] = builder.CreateOr(comps[0], comps[2]);
230 return comps[0];
231 } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
232 return builder.getInt32(0);
233 else
234 assert(0);
235 return NULL;
236 }
237
238 // src is int32 or int16, return is int32x4 [0,255] rgba
239 // RGB_565 channel order is weird
ScreenColorToIntVector(IRBuilder<> & builder,const GGLPixelFormat format,Value * src)240 static Value * ScreenColorToIntVector(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
241 {
242 src = builder.CreateZExt(src, builder.getInt32Ty());
243 Value * dst = intVec(builder, src, src, src, src);
244 if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
245 dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24));
246 dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff));
247 } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
248 // channel order is weird
249 dst = builder.CreateAnd(dst, constIntVec(builder, 0xf800, 0x7e0, 0x1f, 0));
250 dst = builder.CreateLShr(dst, constIntVec(builder, 8, 3, 0, 0));
251 dst = builder.CreateShl(dst, constIntVec(builder, 0, 0, 3, 0));
252 dst = builder.CreateOr(dst, constIntVec(builder, 0, 0, 0, 0xff));
253 } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
254 ALOGD("pf2: ScreenColorToIntVector GGL_PIXEL_FORMAT_UNKNOWN"); // not set yet, do nothing
255 else
256 assert(0);
257 return dst;
258 }
259
260 // src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32
GenerateFSBlend(const GGLState * gglCtx,const GGLPixelFormat format,IRBuilder<> & builder,Value * src,Value * dst)261 Value * GenerateFSBlend(const GGLState * gglCtx, const GGLPixelFormat format, /*const RegDesc * regDesc,*/
262 IRBuilder<> & builder, Value * src, Value * dst)
263 {
264 Type * const intType = builder.getInt32Ty();
265
266 // TODO cast the outputs pointer type to int for writing to minimize bandwidth
267 if (!gglCtx->blendState.enable) {
268 // if (regDesc->IsInt32Color())
269 // {
270 // debug_printf("GenerateFixedFS dst is already scalar fixed0 \n");
271 // src = builder.CreateExtractElement(src, builder.getInt32(0));
272 // src = builder.CreateBitCast(src, intType); // it's already RGBA int32
273 // }
274 // else if (regDesc->IsVectorType(Float))
275 // {
276 src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
277 src = builder.CreateFPToSI(src, intVecType(builder));
278 src = Saturate(builder, src);
279 src = IntVectorToScreenColor(builder, format, src);
280 // }
281 // else if (regDesc->IsVectorType(Fixed8))
282 // {
283 // src = builder.CreateBitCast(src, instr->GetIntVectorType());
284 // src = Saturate(instr, src);
285 // src = IntVectorToColor(instr, storage, src);
286 // }
287 // else if (regDesc->IsVectorType(Fixed16))
288 // {
289 // src = builder.CreateBitCast(src, instr->GetIntVectorType());
290 // src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
291 // src = Saturate(instr, src);
292 // src = IntVectorToColor(instr, storage, src);
293 // }
294 // else
295 // assert(0);
296 return src;
297 }
298 // blending, so convert src to <4 x i32>
299 // if (regDesc->IsInt32Color())
300 // {
301 // src = builder.CreateExtractElement(src, builder.getInt32(0));
302 // src = builder.CreateBitCast(src, intType); // it's already RGBA int32
303 //
304 // Value * channels = Constant::getNullValue(instr->GetIntVectorType());
305 // channels = builder.CreateInsertElement(channels, src, builder.getInt32(0));
306 // channels = builder.CreateInsertElement(channels, src, builder.getInt32(1));
307 // channels = builder.CreateInsertElement(channels, src, builder.getInt32(2));
308 // channels = builder.CreateInsertElement(channels, src, builder.getInt32(3));
309 // channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24));
310 // channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff));
311 // src = channels;
312 // }
313 // else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA
314 // src = builder.CreateBitCast(src, instr->GetIntVectorType());
315 // else if (regDesc->IsVectorType(Fixed16))
316 // {
317 // src = builder.CreateBitCast(src, instr->GetIntVectorType());
318 // // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision
319 // src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
320 // }
321 // else if (regDesc->IsVectorType(Float))
322 // {
323 src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
324 src = builder.CreateFPToSI(src, intVecType(builder));
325 // }
326 // else
327 // assert(0);
328
329 Value * const one = constIntVec(builder,255,255,255,255);
330 Value * const zero = constIntVec(builder,0,0,0,0);
331 Value * const sOne = builder.getInt32(255);
332 Value * const sZero = builder.getInt32(0);
333
334 #if USE_LLVM_SCANLINE
335 Value * constant = constIntVec(builder,gglCtx->blendState.color[0],
336 gglCtx->blendState.color[1],
337 gglCtx->blendState.color[2],
338 gglCtx->blendState.color[3]);
339 #else
340 Value * constant = NULL;
341 assert(0);
342 #endif
343
344 Value * srcA = extractVector(builder,src)[3];
345 Value * dstA = extractVector(builder,dst)[3];
346 Value * constantA = extractVector(builder,constant)[3];
347
348 Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst,
349 constant, one, zero, srcA, dstA,
350 constantA, sOne, true, builder);
351 if (gglCtx->blendState.scf != gglCtx->blendState.saf) {
352 Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA,
353 constantA, sOne, sZero, srcA, dstA,
354 constantA, sOne, false, builder);
355 sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3),
356 name("sfAStore"));
357 }
358
359 Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst,
360 constant, one, zero, srcA, dstA,
361 constantA, sOne, true, builder);
362 if (gglCtx->blendState.dcf != gglCtx->blendState.daf) {
363 Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA,
364 constantA, sOne, sZero, srcA, dstA,
365 constantA, sOne, false, builder);
366 df = builder.CreateInsertElement(df, dfA, builder.getInt32(3),
367 name("dfAStore"));
368 }
369
370 // this is factor *= 256 / 255; factors have a chance of constant folding
371 sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7)));
372 df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7)));
373
374 src = builder.CreateMul(src, sf);
375 dst = builder.CreateMul(dst, df);
376
377 Value * res = NULL;
378 switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
379 case GL_FUNC_ADD:
380 res = builder.CreateAdd(src, dst);
381 break;
382 case GL_FUNC_SUBTRACT:
383 res = builder.CreateSub(src, dst);
384 break;
385 case GL_FUNC_REVERSE_SUBTRACT:
386 res = builder.CreateSub(dst, src);
387 break;
388 default:
389 assert(0);
390 break;
391 }
392 if (gglCtx->blendState.ce != gglCtx->blendState.ae) {
393 srcA = extractVector(builder,src)[3];
394 dstA = extractVector(builder,dst)[3];
395 Value * resA = NULL;
396 switch (gglCtx->blendState.ae + GL_FUNC_ADD) {
397 case GL_FUNC_ADD:
398 resA = builder.CreateAdd(srcA, dstA);
399 break;
400 case GL_FUNC_SUBTRACT:
401 resA = builder.CreateSub(srcA, dstA);
402 break;
403 case GL_FUNC_REVERSE_SUBTRACT:
404 resA = builder.CreateSub(dstA, srcA);
405 break;
406 default:
407 assert(0);
408 break;
409 }
410 res = builder.CreateInsertElement(res, resA, builder.getInt32(3),
411 name("resAStore"));
412 }
413
414 res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8));
415 res = Saturate(builder, res);
416 res = IntVectorToScreenColor(builder, format, res);
417 return res;
418 }
419
ScanLineFunctionType(IRBuilder<> & builder)420 static FunctionType * ScanLineFunctionType(IRBuilder<> & builder)
421 {
422 std::vector<Type*> funcArgs;
423 VectorType * vectorType = floatVecType(builder);
424 PointerType * vectorPtr = PointerType::get(vectorType, 0);
425 Type * intType = builder.getInt32Ty();
426 PointerType * intPointerType = PointerType::get(intType, 0);
427 PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0);
428
429 funcArgs.push_back(vectorPtr); // start
430 funcArgs.push_back(vectorPtr); // step
431 funcArgs.push_back(vectorPtr); // constants
432 funcArgs.push_back(intPointerType); // frame
433 funcArgs.push_back(intPointerType); // depth
434 funcArgs.push_back(bytePointerType); // stencil
435 funcArgs.push_back(bytePointerType); // stencil state
436 funcArgs.push_back(intType); // count
437
438 FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(),
439 llvm::ArrayRef<Type*>(funcArgs),
440 /*isVarArg=*/false);
441
442 return functionType;
443 }
444
445 // generated scanline function parameters are VertexOutput * start, VertexOutput * step,
446 // unsigned * frame, int * depth, unsigned char * stencil,
447 // GGLActiveStencilState * stencilState, unsigned count
GenerateScanLine(const GGLState * gglCtx,const gl_shader_program * program,Module * mod,const char * shaderName,const char * scanlineName)448 void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, Module * mod,
449 const char * shaderName, const char * scanlineName)
450 {
451 IRBuilder<> builder(mod->getContext());
452 // debug_printf("GenerateScanLine %s \n", scanlineName);
453
454 Type * intType = builder.getInt32Ty();
455 PointerType * intPointerType = PointerType::get(intType, 0);
456 Type * byteType = builder.getInt8Ty();
457 PointerType * bytePointerType = PointerType::get(byteType, 0);
458
459 Function * func = mod->getFunction(scanlineName);
460 if (func)
461 return;
462
463 func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName,
464 ScanLineFunctionType(builder)));
465
466 BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0);
467 builder.SetInsertPoint(label_entry);
468 CondBranch condBranch(builder);
469
470 Function::arg_iterator args = func->arg_begin();
471 Value * start = args++;
472 start->setName("start");
473 Value * step = args++;
474 step->setName("step");
475 Value * constants = args++;
476 constants->setName("constants");
477
478 // need alloc to be able to assign to it by using store
479 Value * framePtr = builder.CreateAlloca(intPointerType);
480 builder.CreateStore(args++, framePtr);
481 Value * depthPtr = builder.CreateAlloca(intPointerType);
482 builder.CreateStore(args++, depthPtr);
483 Value * stencilPtr = builder.CreateAlloca(bytePointerType);
484 builder.CreateStore(args++, stencilPtr);
485 Value * stencilState = args++;
486 stencilState->setName("stencilState");
487 Value * countPtr = builder.CreateAlloca(intType);
488 builder.CreateStore(args++, countPtr);
489
490 Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL;
491 if (gglCtx->bufferState.stencilTest) {
492 sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace");
493 if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref)
494 sRef = builder.getInt8(gglCtx->frontStencil.ref);
495 else
496 sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef");
497 if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask)
498 sMask = builder.getInt8(gglCtx->frontStencil.mask);
499 else
500 sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask");
501 if (gglCtx->frontStencil.func == gglCtx->backStencil.func)
502 sFunc = builder.getInt8(gglCtx->frontStencil.func);
503 else
504 sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc");
505 }
506
507 condBranch.beginLoop(); // while (count > 0)
508
509 assert(framePtr && gglCtx);
510 // get values
511 Value * frame = NULL;
512 if (GGL_PIXEL_FORMAT_RGBA_8888 == gglCtx->bufferState.colorFormat)
513 frame = builder.CreateLoad(framePtr);
514 else if (GGL_PIXEL_FORMAT_RGB_565 == gglCtx->bufferState.colorFormat) {
515 frame = builder.CreateLoad(framePtr);
516 frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt16Ty(), 0));
517 } else if (GGL_PIXEL_FORMAT_UNKNOWN == gglCtx->bufferState.colorFormat)
518 frame = builder.CreateLoad(framePtr); // color buffer not set yet
519 else
520 assert(0);
521
522 frame->setName("frame");
523 Value * depth = NULL, * stencil = NULL;
524 if (gglCtx->bufferState.depthTest) {
525 assert(GGL_PIXEL_FORMAT_Z_32 == gglCtx->bufferState.depthFormat);
526 depth = builder.CreateLoad(depthPtr);
527 depth->setName("depth");
528 }
529
530 Value * count = builder.CreateLoad(countPtr);
531 count->setName("count");
532
533 Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0));
534 condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0)
535 condBranch.brk(); // break;
536 condBranch.endif();
537
538 Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL;
539 if (gglCtx->bufferState.stencilTest) {
540 stencil = builder.CreateLoad(stencilPtr);
541 stencil->setName("stencil");
542
543 // temporaries to load/store value
544 sCmpPtr = builder.CreateAlloca(builder.getInt1Ty());
545 sCmpPtr->setName("sCmpPtr");
546 sPtr = builder.CreateAlloca(byteType);
547 sPtr->setName("sPtr");
548
549 s = builder.CreateLoad(stencil);
550 s = builder.CreateAnd(s, sMask);
551 builder.CreateStore(s, sPtr);
552
553 if (gglCtx->frontStencil.func != gglCtx->backStencil.func)
554 condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0)));
555
556 StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr);
557
558 if (gglCtx->frontStencil.func != gglCtx->backStencil.func) {
559 condBranch.elseop();
560 StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr);
561 condBranch.endif();
562 }
563
564 sCmp = builder.CreateLoad(sCmpPtr);
565 } else
566 sCmp = ConstantInt::getTrue(mod->getContext());
567 sCmp->setName("sCmp");
568
569 Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL;
570 if (gglCtx->bufferState.depthTest) {
571 depthZ = builder.CreateLoad(depth, "depthZ"); // z stored in buffer
572 zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z
573 zPtr->setName("zPtr");
574
575 // modified incoming z
576 z = builder.CreateBitCast(start, intPointerType);
577 z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET +
578 GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
579 z = builder.CreateLoad(z, "z");
580
581 builder.CreateStore(z, zPtr);
582
583 Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0));
584 condBranch.ifCond(zNegative);
585 // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int
586 z = builder.CreateXor(z, builder.getInt32(0x7fffffff));
587 builder.CreateStore(z, zPtr);
588
589 condBranch.endif();
590
591 z = builder.CreateLoad(zPtr, "z");
592
593 switch (0x200 | gglCtx->bufferState.depthFunc) {
594 case GL_NEVER:
595 zCmp = ConstantInt::getFalse(mod->getContext());
596 break;
597 case GL_LESS:
598 zCmp = builder.CreateICmpSLT(z, depthZ);
599 break;
600 case GL_EQUAL:
601 zCmp = builder.CreateICmpEQ(z, depthZ);
602 break;
603 case GL_LEQUAL:
604 zCmp = builder.CreateICmpSLE(z, depthZ);
605 break;
606 case GL_GREATER:
607 zCmp = builder.CreateICmpSGT(z, depthZ);
608 break;
609 case GL_NOTEQUAL:
610 zCmp = builder.CreateICmpNE(z, depthZ);
611 break;
612 case GL_GEQUAL:
613 zCmp = builder.CreateICmpSGE(z, depthZ);
614 break;
615 case GL_ALWAYS:
616 zCmp = ConstantInt::getTrue(mod->getContext());
617 break;
618 default:
619 assert(0);
620 break;
621 }
622 } else // no depth test means always pass
623 zCmp = ConstantInt::getTrue(mod->getContext());
624 zCmp->setName("zCmp");
625
626 condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail");
627 condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail");
628
629 Value * inputs = start;
630 Value * outputs = start;
631
632 Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start,
633 offsetof(VertexOutput,fragColor)/sizeof(Vector4));
634
635 Function * fsFunction = mod->getFunction(shaderName);
636 assert(fsFunction);
637 CallInst *call = builder.CreateCall3(fsFunction,inputs, outputs, constants);
638 call->setCallingConv(CallingConv::C);
639 call->setTailCall(false);
640
641 Value * dst = Constant::getNullValue(intVecType(builder));
642 if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) {
643 Value * frameColor = builder.CreateLoad(frame, "frameColor");
644 dst = ScreenColorToIntVector(builder, gglCtx->bufferState.colorFormat, frameColor);
645 }
646
647 Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0);
648 src = builder.CreateLoad(src);
649
650 Value * color = GenerateFSBlend(gglCtx, gglCtx->bufferState.colorFormat,/*&prog->outputRegDesc,*/ builder, src, dst);
651 builder.CreateStore(color, frame);
652 // TODO DXL depthmask check
653 if (gglCtx->bufferState.depthTest) {
654 z = builder.CreateBitCast(z, intType);
655 builder.CreateStore(z, depth); // store z
656 }
657
658 if (gglCtx->bufferState.stencilTest)
659 builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass,
660 gglCtx->backStencil.dPass, sPtr, sRef), stencil);
661
662 condBranch.elseop(); // failed z test
663
664 if (gglCtx->bufferState.stencilTest)
665 builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail,
666 gglCtx->backStencil.dFail, sPtr, sRef), stencil);
667 condBranch.endif();
668 condBranch.elseop(); // failed s test
669
670 if (gglCtx->bufferState.stencilTest)
671 builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail,
672 gglCtx->backStencil.sFail, sPtr, sRef), stencil);
673
674 condBranch.endif();
675 assert(frame);
676 frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++
677 // frame may have been casted to short* from int*, so cast back
678 frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt32Ty(), 0));
679 builder.CreateStore(frame, framePtr);
680 if (gglCtx->bufferState.depthTest) {
681 depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++
682 builder.CreateStore(depth, depthPtr);
683 }
684 if (gglCtx->bufferState.stencilTest) {
685 stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++
686 builder.CreateStore(stencil, stencilPtr);
687 }
688 Value * vPtr = NULL, * v = NULL, * dx = NULL;
689 if (program->UsesFragCoord) {
690 vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
691 GGL_FS_INPUT_FRAGCOORD_INDEX);
692 v = builder.CreateLoad(vPtr);
693 dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
694 GGL_FS_INPUT_FRAGCOORD_INDEX);
695 dx = builder.CreateLoad(dx);
696 v = builder.CreateFAdd(v, dx);
697 builder.CreateStore(v, vPtr);
698 } else if (gglCtx->bufferState.depthTest) {
699 Type * floatType = builder.getFloatTy();
700 PointerType * floatPointerType = PointerType::get(floatType, 0);
701 vPtr = builder.CreateBitCast(start, floatPointerType);
702 vPtr = builder.CreateConstInBoundsGEP1_32(vPtr,
703 (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
704 v = builder.CreateLoad(vPtr);
705 dx = builder.CreateBitCast(step, floatPointerType);
706 dx = builder.CreateConstInBoundsGEP1_32(dx,
707 (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
708 dx = builder.CreateLoad(dx);
709 v = builder.CreateFAdd(v, dx);
710 builder.CreateStore(v, vPtr);
711 }
712
713 if (program->UsesPointCoord) {
714 vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
715 GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
716 v = builder.CreateLoad(vPtr);
717 dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
718 GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
719 dx = builder.CreateLoad(dx);
720 v = builder.CreateFAdd(v, dx);
721 builder.CreateStore(v, vPtr);
722 }
723
724 for (unsigned i = 0; i < program->VaryingSlots; ++i) {
725 vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i);
726 v = builder.CreateLoad(vPtr);
727 dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
728 GGL_FS_INPUT_VARYINGS_INDEX + i);
729 dx = builder.CreateLoad(dx);
730 v = builder.CreateFAdd(v, dx);
731 builder.CreateStore(v, vPtr);
732 }
733
734 count = builder.CreateSub(count, builder.getInt32(1));
735 builder.CreateStore(count, countPtr); // count--;
736
737 condBranch.endLoop();
738
739 builder.CreateRetVoid();
740 }
741