1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2010 Luca Barbieri
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file ir_to_llvm.cpp
29 *
30 * Translates the IR to LLVM
31 */
32
33 /* this tends to get set as part of LLVM_CFLAGS, but we definitely want asserts */
34 #ifdef NDEBUG
35 #undef NDEBUG
36 #endif
37
38 #include "llvm/ADT/ArrayRef.h"
39 #include "llvm/DerivedTypes.h"
40 #include "llvm/LLVMContext.h"
41 #include "llvm/Module.h"
42 #include "llvm/Analysis/Verifier.h"
43 #include "llvm/Support/IRBuilder.h"
44 //#include "llvm/Intrinsics.h"
45
46 #include <vector>
47 #include <stdio.h>
48 #include <map>
49 /*
50 #ifdef _MSC_VER
51 #include <unordered_map>
52 #else
53 #include <tr1/unordered_map>
54 #endif
55 // use C++0x/Microsoft convention
56 namespace std
57 {
58 using namespace tr1;
59 }
60 //*/
61
62 #include "ir.h"
63 #include "ir_visitor.h"
64 #include "glsl_types.h"
65 #include "src/mesa/main/mtypes.h"
66
67 // Helper function to convert array to llvm::ArrayRef
68 template <typename T, size_t N>
pack(T const (& array)[N])69 static inline llvm::ArrayRef<T> pack(T const (&array)[N]) {
70 return llvm::ArrayRef<T>(array);
71 }
72
73 // Helper function to convert pointer + size to llvm::ArrayRef
74 template <typename T>
pack(T const * ptr,size_t n)75 static inline llvm::ArrayRef<T> pack(T const *ptr, size_t n) {
76 return llvm::ArrayRef<T>(ptr, n);
77 }
78
79 struct GGLState;
80
81 llvm::Value * tex2D(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
82 const GGLState * gglCtx);
83 llvm::Value * texCube(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
84 const GGLState * gglCtx);
85
86 class ir_to_llvm_visitor : public ir_visitor {
87 ir_to_llvm_visitor();
88 public:
89
90
91 llvm::LLVMContext& ctx;
92 llvm::Module* mod;
93 llvm::Function* fun;
94 // could easily support more loops, but GLSL doesn't support multiloop break/continue
95 std::pair<llvm::BasicBlock*, llvm::BasicBlock*> loop;
96 llvm::BasicBlock* bb;
97 llvm::Value* result;
98 llvm::IRBuilder<> bld;
99
100 const GGLState * gglCtx;
101 const char * shaderSuffix;
102 llvm::Value * inputsPtr, * outputsPtr, * constantsPtr; // internal globals to store inputs/outputs/constants pointers
103 llvm::Value * inputs, * outputs, * constants;
104
ir_to_llvm_visitor(llvm::Module * p_mod,const GGLState * GGLCtx,const char * suffix)105 ir_to_llvm_visitor(llvm::Module* p_mod, const GGLState * GGLCtx, const char * suffix)
106 : ctx(p_mod->getContext()), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0,
107 (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx), shaderSuffix(suffix),
108 inputsPtr(NULL), outputsPtr(NULL), constantsPtr(NULL),
109 inputs(NULL), outputs(NULL), constants(NULL)
110 {
111 llvm::PointerType * const floatVecPtrType = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(),4), 0);
112 llvm::Constant * const nullFloatVecPtr = llvm::Constant::getNullValue(floatVecPtrType);
113 // make input, output and consts global pointers so they can be used in
114 // different LLVM functions since the shader shares these "registers" across "functions"
115
116 inputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
117 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_inputPtr");
118
119 outputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
120 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_outputsPtr");
121
122 constantsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
123 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_constantsPtr");
124 }
125
llvm_base_type(unsigned base_type)126 llvm::Type* llvm_base_type(unsigned base_type)
127 {
128 switch(base_type)
129 {
130 case GLSL_TYPE_VOID:
131 return llvm::Type::getVoidTy(ctx);
132 case GLSL_TYPE_UINT:
133 case GLSL_TYPE_INT:
134 return llvm::Type::getInt32Ty(ctx);
135 case GLSL_TYPE_FLOAT:
136 return llvm::Type::getFloatTy(ctx);
137 case GLSL_TYPE_BOOL:
138 return llvm::Type::getInt1Ty(ctx);
139 case GLSL_TYPE_SAMPLER:
140 return llvm::PointerType::getUnqual(llvm::Type::getVoidTy(ctx));
141 default:
142 assert(0);
143 return 0;
144 }
145 }
146
llvm_vec_type(const glsl_type * type)147 llvm::Type* llvm_vec_type(const glsl_type* type)
148 {
149 if (type->is_array())
150 return llvm::ArrayType::get(llvm_type(type->fields.array), type->array_size());
151
152 if (type->is_record())
153 {
154 std::vector<llvm::Type*> fields;
155 for (unsigned i = 0; i < type->length; i++)
156 fields.push_back(llvm_type(type->fields.structure[i].type));
157 return llvm::StructType::get(ctx, llvm::ArrayRef<llvm::Type*>(
158 fields));
159 }
160
161 llvm::Type* base_type = llvm_base_type(type->base_type);
162 if (type->vector_elements <= 1) {
163 return base_type;
164 } else {
165 return llvm::VectorType::get(base_type, type->vector_elements);
166 }
167 }
168
llvm_type(const glsl_type * type)169 llvm::Type* llvm_type(const glsl_type* type)
170 {
171 llvm::Type* vec_type = llvm_vec_type(type);
172 if (type->matrix_columns <= 1) {
173 return vec_type;
174 } else {
175 return llvm::ArrayType::get(vec_type, type->matrix_columns);
176 }
177 }
178
179 typedef std::map<ir_variable*, llvm::Value*> llvm_variables_t;
180 //typedef std::unordered_map<ir_variable*, llvm::Value*> llvm_variables_t;
181 llvm_variables_t llvm_variables;
182
llvm_variable(class ir_variable * var)183 llvm::Value* llvm_variable(class ir_variable* var)
184 {
185 llvm_variables_t::iterator vari = llvm_variables.find(var);
186 if (vari != llvm_variables.end()) {
187 return vari->second;
188 } else {
189 llvm::Type* type = llvm_type(var->type);
190
191 llvm::Value* v = NULL;
192 if(fun) {
193 if (ir_var_in == var->mode)
194 {
195 assert(var->location >= 0);
196 v = bld.CreateConstGEP1_32(inputs, var->location);
197 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
198 }
199 else if (ir_var_out == var->mode)
200 {
201 assert(var->location >= 0);
202 v = bld.CreateConstGEP1_32(outputs, var->location);
203 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
204 }
205 else if (ir_var_uniform == var->mode)
206 {
207 assert(var->location >= 0);
208 v = bld.CreateConstGEP1_32(constants, var->location);
209 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
210 }
211 else
212 {
213 if(bb == &fun->getEntryBlock())
214 v = bld.CreateAlloca(type, 0, var->name);
215 else
216 v = new llvm::AllocaInst(type, 0, var->name, fun->getEntryBlock().getTerminator());
217 }
218 } else {
219 // TODO: can anything global be non-constant in GLSL?; fix linkage
220 //printf("var '%s' mode=%d location=%d \n", var->name, var->mode, var->location);
221 switch(var->mode)
222 {
223 case ir_var_auto: // fall through
224 case ir_var_temporary:
225 {
226 llvm::Constant * init = llvm::UndefValue::get(llvm_type(var->type));
227 if(var->constant_value)
228 init = llvm_constant(var->constant_value);
229 v = new llvm::GlobalVariable(*mod, type, var->read_only, llvm::GlobalValue::InternalLinkage, init, var->name);
230 break;
231 }
232 case ir_var_in: // fall through
233 case ir_var_out: // fall through
234 case ir_var_uniform: // fall through
235 assert(var->location >= 0);
236 return NULL; // variable outside of function means declaration
237 default:
238 assert(0);
239 }
240
241 // llvm::Function::LinkageTypes linkage;
242 // if(var->mode == ir_var_auto || var->mode == ir_var_temporary)
243 // linkage = llvm::GlobalValue::InternalLinkage;
244 // else
245 // linkage = llvm::GlobalValue::ExternalLinkage;
246 // llvm::Constant* init = 0;
247 // if(var->constant_value)
248 // {
249 // init = llvm_constant(var->constant_value);
250 // // this constants need to be external (ie. written to output)
251 // if (llvm::GlobalValue::ExternalLinkage == linkage)
252 // linkage = llvm::GlobalValue::AvailableExternallyLinkage;
253 // }
254 // else if(linkage == llvm::GlobalValue::InternalLinkage)
255 // init = llvm::UndefValue::get(llvm_type(var->type));
256 // v = new llvm::GlobalVariable(*mod, type, var->read_only, linkage, init, var->name);
257 }
258 assert(v);
259 llvm_variables[var] = v;
260 return v;
261 }
262 }
263
264 //typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t;
265 //typedef std::unordered_map<ir_function_signature*, llvm::Function*> llvm_functions_t;
266 //llvm_functions_t llvm_functions;
267
llvm_function(class ir_function_signature * sig)268 llvm::Function* llvm_function(class ir_function_signature* sig)
269 {
270 const char* name = sig->function_name();
271 char * functionName = (char *)malloc(strlen(name) + strlen(shaderSuffix) + 1);
272 strcpy(functionName, name);
273 strcat(functionName, shaderSuffix);
274 llvm::Function * function = mod->getFunction(functionName);
275 if (function)
276 {
277 free(functionName);
278 return function;
279 }
280 else
281 {
282 llvm::Function::LinkageTypes linkage;
283 std::vector<llvm::Type*> params;
284 foreach_iter(exec_list_iterator, iter, sig->parameters) {
285 ir_variable* arg = (ir_variable*)iter.get();
286 params.push_back(llvm_type(arg->type));
287 }
288
289 if(!strcmp(name, "main") || !sig->is_defined)
290 {
291 linkage = llvm::Function::ExternalLinkage;
292 llvm::PointerType * vecPtrTy = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(), 4), 0);
293 assert(0 == params.size());
294 params.push_back(vecPtrTy); // inputs
295 params.push_back(vecPtrTy); // outputs
296 params.push_back(vecPtrTy); // constants
297 }
298 else {
299 linkage = llvm::Function::InternalLinkage;
300 }
301 llvm::FunctionType* ft = llvm::FunctionType::get(llvm_type(sig->return_type),
302 llvm::ArrayRef<llvm::Type*>(params),
303 false);
304 function = llvm::Function::Create(ft, linkage, functionName, mod);
305 free(functionName);
306 return function;
307 }
308 }
309
llvm_value(class ir_instruction * ir)310 llvm::Value* llvm_value(class ir_instruction* ir)
311 {
312 result = 0;
313 ir->accept(this);
314 return result;
315 }
316
llvm_constant(class ir_instruction * ir)317 llvm::Constant* llvm_constant(class ir_instruction* ir)
318 {
319 return (llvm::Constant *)llvm_value(ir);
320 //return &dynamic_cast<llvm::Constant&>(*llvm_value(ir));
321 }
322
llvm_int(unsigned v)323 llvm::Constant* llvm_int(unsigned v)
324 {
325 return llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), v);
326 }
327
llvm_pointer(class ir_rvalue * ir)328 llvm::Value* llvm_pointer(class ir_rvalue* ir)
329 {
330 if(ir_dereference_variable* deref = ir->as_dereference_variable())
331 return llvm_variable(deref->variable_referenced());
332 else if(ir_dereference_array* deref = ir->as_dereference_array())
333 {
334 llvm::Value* gep[2] = {llvm_int(0), llvm_value(deref->array_index)};
335 return bld.CreateInBoundsGEP(llvm_pointer(deref->array), gep);
336 }
337 else if(ir->as_dereference())
338 {
339 ir_dereference_record* deref = (ir_dereference_record*)ir;
340 int idx = deref->record->type->field_index(deref->field);
341 assert(idx >= 0);
342 return bld.CreateConstInBoundsGEP2_32(llvm_pointer(deref->record), 0, idx);
343 }
344 else
345 {
346 assert(0);
347 return 0;
348 }
349 }
350
351 // llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a)
352 // {
353 // llvm::Type* types[1] = {a->getType()};
354 // return bld.CreateCall(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a);
355 // }
356 //
357 // llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a, llvm::Value* b)
358 // {
359 // llvm::Type* types[2] = {a->getType(), b->getType()};
360 // /* only one type suffix is usually needed, so pass 1 here */
361 // return bld.CreateCall2(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a, b);
362 // }
363
llvm_intrinsic_unop(ir_expression_operation op,llvm::Value * op0)364 llvm::Value* llvm_intrinsic_unop(ir_expression_operation op, llvm::Value * op0)
365 {
366 llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
367 const char * name = NULL;
368 switch (op) {
369 case ir_unop_sin:
370 name = "sinf";
371 break;
372 case ir_unop_cos:
373 name = "cosf";
374 break;
375 default:
376 assert(0);
377 }
378
379 llvm::Function * function = mod->getFunction(name);
380 if (!function) {
381 // predeclare the intrinsic
382 std::vector<llvm::Type*> args;
383 args.push_back(floatType);
384 llvm::FunctionType* type = llvm::FunctionType::get(floatType,
385 llvm::ArrayRef<llvm::Type*>(args),
386 false);
387 function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
388 function->setCallingConv(llvm::CallingConv::C);
389 }
390
391 return bld.CreateCall(function, op0);
392 }
393
llvm_intrinsic_binop(ir_expression_operation op,llvm::Value * op0,llvm::Value * op1)394 llvm::Value* llvm_intrinsic_binop(ir_expression_operation op, llvm::Value * op0, llvm::Value * op1)
395 {
396 llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
397 const char * name = NULL;
398 switch (op) {
399 case ir_binop_pow:
400 name = "powf";
401 break;
402 default:
403 assert(0);
404 }
405
406 llvm::Function * function = mod->getFunction(name);
407 if (!function) {
408 // predeclare the intrinsic
409 std::vector<llvm::Type*> args;
410 args.push_back(floatType);
411 args.push_back(floatType);
412 llvm::FunctionType* type = llvm::FunctionType::get(floatType,
413 llvm::ArrayRef<llvm::Type*>(args),
414 false);
415 function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
416 function->setCallingConv(llvm::CallingConv::C);
417 }
418
419 return bld.CreateCall2(function, op0, op1);
420 }
421
llvm_imm(llvm::Type * type,double v)422 llvm::Constant* llvm_imm(llvm::Type* type, double v)
423 {
424 if(type->isVectorTy())
425 {
426 std::vector<llvm::Constant*> values;
427 values.push_back(llvm_imm(((llvm::VectorType*)type)->getElementType(), v));
428 for(unsigned i = 1; i < ((llvm::VectorType*)type)->getNumElements(); ++i)
429 values.push_back(values[0]);
430 return llvm::ConstantVector::get(values);
431 }
432 else if(type->isIntegerTy())
433 return llvm::ConstantInt::get(type, v);
434 else if(type->isFloatingPointTy())
435 return llvm::ConstantFP::get(type, v);
436 else
437 {
438 assert(0);
439 return 0;
440 }
441 }
442
create_shuffle3(llvm::IRBuilder<> & bld,llvm::Value * v,unsigned a,unsigned b,unsigned c,const llvm::Twine & name="")443 static llvm::Value* create_shuffle3(llvm::IRBuilder<>& bld, llvm::Value* v, unsigned a, unsigned b, unsigned c, const llvm::Twine& name = "")
444 {
445 llvm::Type* int_ty = llvm::Type::getInt32Ty(v->getContext());
446 llvm::Constant* vals[3] = {llvm::ConstantInt::get(int_ty, a), llvm::ConstantInt::get(int_ty, b), llvm::ConstantInt::get(int_ty, c)};
447 return bld.CreateShuffleVector(v, llvm::UndefValue::get(v->getType()), llvm::ConstantVector::get(pack(vals)), name);
448 }
449
create_select(unsigned width,llvm::Value * cond,llvm::Value * tru,llvm::Value * fal,const char * name="")450 llvm::Value* create_select(unsigned width, llvm::Value * cond, llvm::Value * tru, llvm::Value * fal, const char * name = "")
451 {
452 if (1 == width)
453 return bld.CreateSelect(cond, tru, fal, name);
454
455 llvm::Type * vectorType = tru->getType();
456 llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
457 for (unsigned int i = 0; i < width; i++) {
458 llvm::Value * c = bld.CreateExtractElement(cond, llvm_int(i));
459 llvm::Value * t = bld.CreateExtractElement(tru, llvm_int(i));
460 llvm::Value * f = bld.CreateExtractElement(fal, llvm_int(i));
461 llvm::Value * v = bld.CreateSelect(c, t, f, name);
462 vector = bld.CreateInsertElement(vector, v, llvm_int(i), "vslct");
463 }
464 return vector;
465 }
466
create_dot_product(llvm::Value * ops0,llvm::Value * ops1,glsl_base_type type,unsigned width)467 llvm::Value* create_dot_product(llvm::Value* ops0, llvm::Value* ops1, glsl_base_type type, unsigned width)
468 {
469 llvm::Value* prod;
470 switch (type) {
471 case GLSL_TYPE_UINT:
472 case GLSL_TYPE_INT:
473 prod = bld.CreateMul(ops0, ops1, "dot.mul");
474 break;
475 case GLSL_TYPE_FLOAT:
476 prod = bld.CreateFMul(ops0, ops1, "dot.mul");
477 break;
478 default:
479 assert(0);
480 }
481
482 if (width<= 1)
483 return prod;
484
485 llvm::Value* sum = 0;
486 for (unsigned i = 0; i < width; ++i) {
487 llvm::Value* elem = bld.CreateExtractElement(prod, llvm_int(i), "dot.elem");
488 if (sum) {
489 if (type == GLSL_TYPE_FLOAT)
490 sum = bld.CreateFAdd(sum, elem, "dot.add");
491 else
492 sum = bld.CreateAdd(sum, elem, "dot.add");
493 }
494 else
495 sum = elem;
496 }
497 return sum;
498 }
499
llvm_expression(ir_expression * ir)500 llvm::Value* llvm_expression(ir_expression* ir)
501 {
502 llvm::Value* ops[2];
503 for(unsigned i = 0; i < ir->get_num_operands(); ++i)
504 ops[i] = llvm_value(ir->operands[i]);
505
506 if(ir->get_num_operands() == 2)
507 {
508 int vecidx = -1;
509 int scaidx = -1;
510 if(ir->operands[0]->type->vector_elements <= 1 && ir->operands[1]->type->vector_elements > 1)
511 {
512 scaidx = 0;
513 vecidx = 1;
514 }
515 else if(ir->operands[0]->type->vector_elements > 1 && ir->operands[1]->type->vector_elements <= 1)
516 {
517 scaidx = 1;
518 vecidx = 0;
519 }
520 else
521 assert(ir->operands[0]->type->vector_elements == ir->operands[1]->type->vector_elements);
522
523 if(scaidx >= 0)
524 {
525 llvm::Value* vec;
526 vec = llvm::UndefValue::get(ops[vecidx]->getType());
527 for(unsigned i = 0; i < ir->operands[vecidx]->type->vector_elements; ++i)
528 vec = bld.CreateInsertElement(vec, ops[scaidx], llvm_int(i), "sca2vec");
529 ops[scaidx] = vec;
530 }
531 }
532
533 switch (ir->operation) {
534 case ir_unop_logic_not:
535 return bld.CreateNot(ops[0]);
536 case ir_unop_neg:
537 switch (ir->operands[0]->type->base_type) {
538 case GLSL_TYPE_UINT:
539 case GLSL_TYPE_BOOL:
540 case GLSL_TYPE_INT:
541 return bld.CreateNeg(ops[0]);
542 case GLSL_TYPE_FLOAT:
543 return bld.CreateFNeg(ops[0]);
544 default:
545 assert(0);
546 }
547 case ir_unop_abs:
548 switch (ir->operands[0]->type->base_type) {
549 case GLSL_TYPE_UINT:
550 case GLSL_TYPE_BOOL:
551 return ops[0];
552 case GLSL_TYPE_INT:
553 return create_select(ir->operands[0]->type->vector_elements,
554 bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "sabs.ge"),
555 ops[0], bld.CreateNeg(ops[0], "sabs.neg"), "sabs.select");
556 case GLSL_TYPE_FLOAT:
557 return create_select(ir->operands[0]->type->vector_elements,
558 bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fabs.ge"),
559 ops[0], bld.CreateFNeg(ops[0], "fabs.neg"), "fabs.select");
560 default:
561 assert(0);
562 }
563 case ir_unop_sign:
564 switch (ir->operands[0]->type->base_type) {
565 case GLSL_TYPE_BOOL:
566 return ops[0];
567 case GLSL_TYPE_UINT:
568 return bld.CreateZExt(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "usign.ne"), ops[0]->getType(), "usign.zext");
569 case GLSL_TYPE_INT:
570 return bld.CreateSelect(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ne"),
571 bld.CreateSelect(bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "sabs.selects"),
572 llvm_imm(ops[0]->getType(), 0), "sabs.select0");
573 case GLSL_TYPE_FLOAT:
574 return bld.CreateSelect(bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ne"),
575 bld.CreateSelect(bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "fabs.selects"),
576 llvm_imm(ops[0]->getType(), 0), "fabs.select0");
577 default:
578 assert(0);
579 }
580 case ir_unop_rcp:
581 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
582 return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), ops[0]);
583 case ir_unop_exp: // fall through
584 case ir_unop_exp2: // fall through
585 case ir_unop_log: // fall through
586 case ir_unop_log2: // fall through
587 case ir_unop_sin: // fall through
588 case ir_unop_cos:
589 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
590 return llvm_intrinsic_unop(ir->operation, ops[0]);
591 // TODO: implement these somehow
592 case ir_unop_dFdx:
593 assert(0);
594 //return llvm_intrinsic(llvm::Intrinsic::ddx, ops[0]);
595 case ir_unop_dFdy:
596 assert(0);
597 //return llvm_intrinsic(llvm::Intrinsic::ddy, ops[0]);
598 case ir_binop_add:
599 switch(ir->operands[0]->type->base_type)
600 {
601 case GLSL_TYPE_BOOL:
602 case GLSL_TYPE_UINT:
603 case GLSL_TYPE_INT:
604 return bld.CreateAdd(ops[0], ops[1]);
605 case GLSL_TYPE_FLOAT:
606 return bld.CreateFAdd(ops[0], ops[1]);
607 default:
608 assert(0);
609 }
610 case ir_binop_sub:
611 switch(ir->operands[0]->type->base_type)
612 {
613 case GLSL_TYPE_BOOL:
614 case GLSL_TYPE_UINT:
615 case GLSL_TYPE_INT:
616 return bld.CreateSub(ops[0], ops[1]);
617 case GLSL_TYPE_FLOAT:
618 return bld.CreateFSub(ops[0], ops[1]);
619 default:
620 assert(0);
621 }
622 case ir_binop_mul:
623 if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_vector())
624 assert(0);
625 else if (ir->operands[0]->type->is_vector() && ir->operands[1]->type->is_matrix()) {
626 assert(0); // matrix multiplication should have been lowered to vector ops
627 llvm::VectorType * vectorType = llvm::VectorType::get(llvm_base_type(ir->operands[1]->type->base_type), ir->operands[1]->type->matrix_columns);
628 llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
629 for (unsigned int i = 0; i < ir->operands[1]->type->matrix_columns; i++) {
630 llvm::Value * value = bld.CreateExtractValue(ops[1], i, "vec*mat_col");
631 value = create_dot_product(value, ops[0], ir->operands[1]->type->base_type, ir->operands[1]->type->vector_elements);
632 vector = bld.CreateInsertElement(vector, value, llvm_int(i), "vec*mat_res");
633 }
634 return vector;
635 }
636 else if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_matrix())
637 assert(0);
638
639 switch (ir->operands[0]->type->base_type) {
640 case GLSL_TYPE_BOOL:
641 return bld.CreateAnd(ops[0], ops[1]);
642 case GLSL_TYPE_UINT:
643 case GLSL_TYPE_INT:
644 return bld.CreateMul(ops[0], ops[1]);
645 case GLSL_TYPE_FLOAT:
646 return bld.CreateFMul(ops[0], ops[1]);
647 default:
648 assert(0);
649 }
650 case ir_binop_div:
651 switch(ir->operands[0]->type->base_type)
652 {
653 case GLSL_TYPE_BOOL:
654 case GLSL_TYPE_UINT:
655 return bld.CreateUDiv(ops[0], ops[1]);
656 case GLSL_TYPE_INT:
657 return bld.CreateSDiv(ops[0], ops[1]);
658 case GLSL_TYPE_FLOAT:
659 return bld.CreateFDiv(ops[0], ops[1]);
660 default:
661 assert(0);
662 }
663 case ir_binop_mod:
664 switch(ir->operands[0]->type->base_type)
665 {
666 case GLSL_TYPE_BOOL:
667 case GLSL_TYPE_UINT:
668 return bld.CreateURem(ops[0], ops[1]);
669 case GLSL_TYPE_INT:
670 return bld.CreateSRem(ops[0], ops[1]);
671 case GLSL_TYPE_FLOAT:
672 return bld.CreateFRem(ops[0], ops[1]);
673 default:
674 assert(0);
675 }
676 case ir_binop_less:
677 switch(ir->operands[0]->type->base_type)
678 {
679 case GLSL_TYPE_BOOL:
680 case GLSL_TYPE_UINT:
681 return bld.CreateICmpULT(ops[0], ops[1]);
682 case GLSL_TYPE_INT:
683 return bld.CreateICmpSLT(ops[0], ops[1]);
684 case GLSL_TYPE_FLOAT:
685 return bld.CreateFCmpOLT(ops[0], ops[1]);
686 default:
687 assert(0);
688 }
689 case ir_binop_greater:
690 switch(ir->operands[0]->type->base_type)
691 {
692 case GLSL_TYPE_BOOL:
693 case GLSL_TYPE_UINT:
694 return bld.CreateICmpUGT(ops[0], ops[1]);
695 case GLSL_TYPE_INT:
696 return bld.CreateICmpSGT(ops[0], ops[1]);
697 case GLSL_TYPE_FLOAT:
698 return bld.CreateFCmpOGT(ops[0], ops[1]);
699 default:
700 assert(0);
701 }
702 case ir_binop_lequal:
703 switch(ir->operands[0]->type->base_type)
704 {
705 case GLSL_TYPE_BOOL:
706 case GLSL_TYPE_UINT:
707 return bld.CreateICmpULE(ops[0], ops[1]);
708 case GLSL_TYPE_INT:
709 return bld.CreateICmpSLE(ops[0], ops[1]);
710 case GLSL_TYPE_FLOAT:
711 return bld.CreateFCmpOLE(ops[0], ops[1]);
712 default:
713 assert(0);
714 }
715 case ir_binop_gequal:
716 switch(ir->operands[0]->type->base_type)
717 {
718 case GLSL_TYPE_BOOL:
719 case GLSL_TYPE_UINT:
720 return bld.CreateICmpUGE(ops[0], ops[1]);
721 case GLSL_TYPE_INT:
722 return bld.CreateICmpSGE(ops[0], ops[1]);
723 case GLSL_TYPE_FLOAT:
724 return bld.CreateFCmpOGE(ops[0], ops[1]);
725 default:
726 assert(0);
727 }
728 case ir_binop_equal: // fall through
729 case ir_binop_all_equal: // TODO: check op same as ir_binop_equal
730 switch (ir->operands[0]->type->base_type) {
731 case GLSL_TYPE_BOOL:
732 case GLSL_TYPE_UINT:
733 case GLSL_TYPE_INT:
734 return bld.CreateICmpEQ(ops[0], ops[1]);
735 case GLSL_TYPE_FLOAT:
736 return bld.CreateFCmpOEQ(ops[0], ops[1]);
737 default:
738 assert(0);
739 }
740 case ir_binop_nequal:
741 switch(ir->operands[0]->type->base_type)
742 {
743 case GLSL_TYPE_BOOL:
744 case GLSL_TYPE_UINT:
745 case GLSL_TYPE_INT:
746 return bld.CreateICmpNE(ops[0], ops[1]);
747 case GLSL_TYPE_FLOAT:
748 return bld.CreateFCmpONE(ops[0], ops[1]);
749 default:
750 assert(0);
751 }
752 case ir_binop_logic_xor:
753 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
754 return bld.CreateICmpNE(ops[0], ops[1]);
755 case ir_binop_logic_or:
756 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
757 return bld.CreateOr(ops[0], ops[1]);
758 case ir_binop_logic_and:
759 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
760 return bld.CreateAnd(ops[0], ops[1]);
761 case ir_binop_dot:
762 return create_dot_product(ops[0], ops[1], ir->operands[0]->type->base_type, ir->operands[0]->type->vector_elements);
763 // case ir_binop_cross: this op does not exist in ir.h
764 // assert(ir->operands[0]->type->vector_elements == 3);
765 // switch(ir->operands[0]->type->base_type)
766 // {
767 // case GLSL_TYPE_UINT:
768 // case GLSL_TYPE_INT:
769 // return bld.CreateSub(
770 // bld.CreateMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
771 // bld.CreateMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
772 // "cross.sub");
773 // case GLSL_TYPE_FLOAT:
774 // return bld.CreateFSub(
775 // bld.CreateFMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
776 // bld.CreateFMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
777 // "cross.sub");
778 // default:
779 // assert(0);
780 // }
781 case ir_unop_sqrt:
782 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
783 return llvm_intrinsic_unop(ir->operation, ops[0]);
784 case ir_unop_rsq:
785 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
786 return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), llvm_intrinsic_unop(ir_unop_sqrt, ops[0]), "rsqrt.rcp");
787 case ir_unop_i2f:
788 return bld.CreateSIToFP(ops[0], llvm_type(ir->type));
789 case ir_unop_u2f:
790 case ir_unop_b2f:
791 return bld.CreateUIToFP(ops[0], llvm_type(ir->type));
792 case ir_unop_b2i:
793 return bld.CreateZExt(ops[0], llvm_type(ir->type));
794 case ir_unop_f2i:
795 return bld.CreateFPToSI(ops[0], llvm_type(ir->type));
796 case ir_unop_f2b:
797 return bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0));
798 case ir_unop_i2b:
799 return bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0));
800 case ir_unop_trunc:
801 {
802 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
803 return ops[0];
804 glsl_type int_type = *ir->operands[0]->type;
805 int_type.base_type = GLSL_TYPE_INT;
806 return bld.CreateSIToFP(bld.CreateFPToSI(ops[0], llvm_type(&int_type), "trunc.fptosi"),ops[0]->getType(), "trunc.sitofp");
807 }
808 case ir_unop_floor:
809 {
810 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
811 return ops[0];
812 llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
813 return bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one));
814 }
815 case ir_unop_ceil:
816 {
817 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
818 return ops[0];
819 llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
820 return bld.CreateFAdd(bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one)), one);
821 }
822 case ir_unop_fract:
823 {
824 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
825 return llvm_imm(ops[0]->getType(), 0);
826 llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
827 return bld.CreateFRem(ops[0], one);
828 }
829 // TODO: NaNs might be wrong in min/max, not sure how to fix it
830 case ir_binop_min:
831 switch(ir->operands[0]->type->base_type)
832 {
833 case GLSL_TYPE_BOOL:
834 return bld.CreateAnd(ops[0], ops[1], "bmin");
835 case GLSL_TYPE_UINT:
836 return bld.CreateSelect(bld.CreateICmpULE(ops[0], ops[1], "umin.le"), ops[0], ops[1], "umin.select");
837 case GLSL_TYPE_INT:
838 return bld.CreateSelect(bld.CreateICmpSLE(ops[0], ops[1], "smin.le"), ops[0], ops[1], "smin.select");
839 case GLSL_TYPE_FLOAT:
840 return bld.CreateSelect(bld.CreateFCmpULE(ops[0], ops[1], "fmin.le"), ops[0], ops[1], "fmin.select");
841 default:
842 assert(0);
843 }
844 case ir_binop_max:
845 switch(ir->operands[0]->type->base_type)
846 {
847 case GLSL_TYPE_BOOL:
848 return bld.CreateOr(ops[0], ops[1], "bmax");
849 case GLSL_TYPE_UINT:
850 return bld.CreateSelect(bld.CreateICmpUGE(ops[0], ops[1], "umax.ge"), ops[0], ops[1], "umax.select");
851 case GLSL_TYPE_INT:
852 return bld.CreateSelect(bld.CreateICmpSGE(ops[0], ops[1], "smax.ge"), ops[0], ops[1], "smax.select");
853 case GLSL_TYPE_FLOAT:
854 return bld.CreateSelect(bld.CreateFCmpUGE(ops[0], ops[1], "fmax.ge"), ops[0], ops[1], "fmax.select");
855 default:
856 assert(0);
857 }
858 case ir_binop_pow:
859 assert(GLSL_TYPE_FLOAT == ir->operands[0]->type->base_type);
860 assert(GLSL_TYPE_FLOAT == ir->operands[1]->type->base_type);
861 return llvm_intrinsic_binop(ir_binop_pow, ops[0], ops[1]);
862 case ir_unop_bit_not:
863 return bld.CreateNot(ops[0]);
864 case ir_binop_bit_and:
865 return bld.CreateAnd(ops[0], ops[1]);
866 case ir_binop_bit_xor:
867 return bld.CreateXor(ops[0], ops[1]);
868 case ir_binop_bit_or:
869 return bld.CreateOr(ops[0], ops[1]);
870 case ir_binop_lshift:
871 switch(ir->operands[0]->type->base_type)
872 {
873 case GLSL_TYPE_BOOL:
874 case GLSL_TYPE_UINT:
875 case GLSL_TYPE_INT:
876 return bld.CreateLShr(ops[0], ops[1]);
877 default:
878 assert(0);
879 }
880 case ir_binop_rshift:
881 switch(ir->operands[0]->type->base_type)
882 {
883 case GLSL_TYPE_BOOL:
884 case GLSL_TYPE_UINT:
885 return bld.CreateLShr(ops[0], ops[1]);
886 case GLSL_TYPE_INT:
887 return bld.CreateAShr(ops[0], ops[1]);
888 default:
889 assert(0);
890 return 0;
891 }
892 default:
893 printf("ir->operation=%d \n", ir->operation);
894 assert(0);
895 return 0;
896 }
897 }
898
visit(class ir_expression * ir)899 virtual void visit(class ir_expression * ir)
900 {
901 result = llvm_expression(ir);
902 }
903
visit(class ir_dereference_array * ir)904 virtual void visit(class ir_dereference_array *ir)
905 {
906 result = bld.CreateLoad(llvm_pointer(ir));
907 }
908
visit(class ir_dereference_record * ir)909 virtual void visit(class ir_dereference_record *ir)
910 {
911 result = bld.CreateLoad(llvm_pointer(ir));
912 }
913
visit(class ir_dereference_variable * ir)914 virtual void visit(class ir_dereference_variable *ir)
915 {
916 result = bld.CreateLoad(llvm_pointer(ir), ir->variable_referenced()->name);
917 }
918
visit(class ir_texture * ir)919 virtual void visit(class ir_texture * ir)
920 {
921 llvm::Value * coordinate = llvm_value(ir->coordinate);
922 if (ir->projector)
923 {
924 llvm::Value * proj = llvm_value(ir->projector);
925 unsigned width = ((llvm::VectorType*)coordinate->getType())->getNumElements();
926 llvm::Value * div = llvm::Constant::getNullValue(coordinate->getType());
927 for (unsigned i = 0; i < width; i++)
928 div = bld.CreateInsertElement(div, proj, bld.getInt32(i), "texProjDup");
929 coordinate = bld.CreateFDiv(coordinate, div, "texProj");
930 }
931
932 ir_variable * sampler = NULL;
933 if(ir_dereference_variable* deref = ir->sampler->as_dereference_variable())
934 sampler = deref->variable_referenced();
935 else if(ir_dereference_array* deref = ir->sampler->as_dereference_array())
936 {
937 assert(0); // not implemented
938 return;
939 deref->array_index;
940 deref->array;
941 }
942 else if(ir->sampler->as_dereference())
943 {
944 assert(0); // not implemented
945 ir_dereference_record* deref = (ir_dereference_record*)ir->sampler;
946 int idx = deref->record->type->field_index(deref->field);
947 assert(idx >= 0);
948 }
949 else
950 assert(0);
951
952 assert(sampler->location >= 0 && sampler->location < 64); // TODO: proper limit
953
954 // ESSL texture LOD is only for 2D texture in vert shader, and it's explicit
955 // bias used only in frag shader, and added to computed LOD
956 assert(ir_tex == ir->op);
957
958 assert(GLSL_TYPE_FLOAT == sampler->type->sampler_type);
959 printf("sampler '%s' location=%d dim=%d type=%d proj=%d lod=%d \n", sampler->name, sampler->location,
960 sampler->type->sampler_dimensionality, sampler->type->sampler_type,
961 ir->projector ? 1 : 0, ir->lod_info.lod ? 1 : 0);
962 if (GLSL_SAMPLER_DIM_CUBE == sampler->type->sampler_dimensionality)
963 result = texCube(bld, coordinate, sampler->location, gglCtx);
964 else if (GLSL_SAMPLER_DIM_2D == sampler->type->sampler_dimensionality)
965 result = tex2D(bld, coordinate, sampler->location, gglCtx);
966 else
967 assert(0);
968 }
969
visit(class ir_discard * ir)970 virtual void visit(class ir_discard * ir)
971 {
972 llvm::BasicBlock* discard = llvm::BasicBlock::Create(ctx, "discard", fun);
973 llvm::BasicBlock* after;
974 if(ir->condition)
975 {
976 after = llvm::BasicBlock::Create(ctx, "discard.survived", fun);
977 bld.CreateCondBr(llvm_value(ir->condition), discard, after);
978 }
979 else
980 {
981 after = llvm::BasicBlock::Create(ctx, "dead_code.discard", fun);
982 bld.CreateBr(discard);
983 }
984
985 bld.SetInsertPoint(discard);
986
987 // FIXME: According to the LLVM mailing list, UnwindInst should not
988 // be used by the frontend since LLVM 3.0, and 'CreateUnwind'
989 // method has been removed from the IRBuilder. Here's the
990 // temporary workaround. But it would be better to remove
991 // this in the future.
992 //
993 // A solution after LLVM 3.0: To add a global boolean in the shader to
994 // store whether it was discarded or not and just continue on normally,
995 // and handle the discard outside the shader, in the scanline function.
996 // The discard instruction is not used frequently, so it should be okay
997 // performance wise.
998 //new llvm::UnwindInst(ctx, discard); /// Deprecated
999
1000 bb = after;
1001 bld.SetInsertPoint(bb);
1002 }
1003
visit(class ir_loop_jump * ir)1004 virtual void visit(class ir_loop_jump *ir)
1005 {
1006 llvm::BasicBlock* target;
1007 if(ir->mode == ir_loop_jump::jump_continue)
1008 target = loop.first;
1009 else if(ir->mode == ir_loop_jump::jump_break)
1010 target = loop.second;
1011 assert(target);
1012
1013 bld.CreateBr(target);
1014
1015 bb = llvm::BasicBlock::Create(ctx, "dead_code.jump", fun);
1016 bld.SetInsertPoint(bb);
1017 }
1018
visit(class ir_loop * ir)1019 virtual void visit(class ir_loop * ir)
1020 {
1021 llvm::BasicBlock* body = llvm::BasicBlock::Create(ctx, "loop", fun);
1022 llvm::BasicBlock* header = body;
1023 llvm::BasicBlock* after = llvm::BasicBlock::Create(ctx, "loop.after", fun);
1024 llvm::Value* ctr;
1025
1026 if(ir->counter)
1027 {
1028 ctr = llvm_variable(ir->counter);
1029 if(ir->from)
1030 bld.CreateStore(llvm_value(ir->from), ctr);
1031 if(ir->to)
1032 header = llvm::BasicBlock::Create(ctx, "loop.header", fun);
1033 }
1034
1035 bld.CreateBr(header);
1036
1037 if(ir->counter && ir->to)
1038 {
1039 bld.SetInsertPoint(header);
1040 llvm::Value* cond;
1041 llvm::Value* load = bld.CreateLoad(ctr);
1042 llvm::Value* to = llvm_value(ir->to);
1043 switch(ir->counter->type->base_type)
1044 {
1045 case GLSL_TYPE_BOOL:
1046 case GLSL_TYPE_UINT:
1047 cond = bld.CreateICmpULT(load, to);
1048 break;
1049 case GLSL_TYPE_INT:
1050 cond = bld.CreateICmpSLT(load, to);
1051 break;
1052 case GLSL_TYPE_FLOAT:
1053 cond = bld.CreateFCmpOLT(load, to);
1054 break;
1055 }
1056 bld.CreateCondBr(cond, body, after);
1057 }
1058
1059 bld.SetInsertPoint(body);
1060
1061 std::pair<llvm::BasicBlock*, llvm::BasicBlock*> saved_loop = loop;
1062 loop = std::make_pair(header, after);
1063 visit_exec_list(&ir->body_instructions, this);
1064 loop = saved_loop;
1065
1066 if(ir->counter && ir->increment)
1067 {
1068 switch(ir->counter->type->base_type)
1069 {
1070 case GLSL_TYPE_BOOL:
1071 case GLSL_TYPE_UINT:
1072 case GLSL_TYPE_INT:
1073 bld.CreateStore(bld.CreateAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
1074 break;
1075 case GLSL_TYPE_FLOAT:
1076 bld.CreateStore(bld.CreateFAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
1077 break;
1078 }
1079 }
1080 bld.CreateBr(header);
1081
1082 bb = after;
1083 bld.SetInsertPoint(bb);
1084 }
1085
visit(class ir_if * ir)1086 virtual void visit(class ir_if *ir)
1087 {
1088 llvm::BasicBlock* bbt = llvm::BasicBlock::Create(ctx, "if", fun);
1089 llvm::BasicBlock* bbf = llvm::BasicBlock::Create(ctx, "else", fun);
1090 llvm::BasicBlock* bbe = llvm::BasicBlock::Create(ctx, "endif", fun);
1091 bld.CreateCondBr(llvm_value(ir->condition), bbt, bbf);
1092
1093 bld.SetInsertPoint(bbt);
1094 visit_exec_list(&ir->then_instructions, this);
1095 bld.CreateBr(bbe);
1096
1097 bld.SetInsertPoint(bbf);
1098 visit_exec_list(&ir->else_instructions, this);
1099 bld.CreateBr(bbe);
1100
1101 bb = bbe;
1102 bld.SetInsertPoint(bb);
1103 }
1104
visit(class ir_return * ir)1105 virtual void visit(class ir_return * ir)
1106 {
1107 if(!ir->value)
1108 bld.CreateRetVoid();
1109 else
1110 bld.CreateRet(llvm_value(ir->value));
1111
1112 bb = llvm::BasicBlock::Create(ctx, "dead_code.return", fun);
1113 bld.SetInsertPoint(bb);
1114 }
1115
visit(class ir_call * ir)1116 virtual void visit(class ir_call * ir)
1117 {
1118 std::vector<llvm::Value*> args;
1119
1120 foreach_iter(exec_list_iterator, iter, *ir)
1121 {
1122 ir_rvalue *arg = (ir_constant *)iter.get();
1123 args.push_back(llvm_value(arg));
1124 }
1125
1126 result = bld.CreateCall(llvm_function(ir->get_callee()), llvm::ArrayRef<llvm::Value*>(args));
1127
1128 llvm::AttrListPtr attr;
1129 ((llvm::CallInst*)result)->setAttributes(attr);
1130 }
1131
visit(class ir_constant * ir)1132 virtual void visit(class ir_constant * ir)
1133 {
1134 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1135 std::vector<llvm::Constant*> fields;
1136 foreach_iter(exec_list_iterator, iter, ir->components) {
1137 ir_constant *field = (ir_constant *)iter.get();
1138 fields.push_back(llvm_constant(field));
1139 }
1140 result = llvm::ConstantStruct::get((llvm::StructType*)llvm_type(ir->type), fields);
1141 }
1142 else if (ir->type->base_type == GLSL_TYPE_ARRAY) {
1143 std::vector<llvm::Constant*> elems;
1144 for (unsigned i = 0; i < ir->type->length; i++)
1145 elems.push_back(llvm_constant(ir->array_elements[i]));
1146 result = llvm::ConstantArray::get((llvm::ArrayType*)llvm_type(ir->type), elems);
1147 }
1148 else
1149 {
1150 llvm::Type* base_type = llvm_base_type(ir->type->base_type);
1151 llvm::Type* vec_type = llvm_vec_type(ir->type);
1152 llvm::Type* type = llvm_type(ir->type);
1153
1154 std::vector<llvm::Constant*> vecs;
1155 unsigned idx = 0;
1156 for (unsigned i = 0; i < ir->type->matrix_columns; ++i) {
1157 std::vector<llvm::Constant*> elems;
1158 for (unsigned j = 0; j < ir->type->vector_elements; ++j) {
1159 llvm::Constant* elem;
1160 switch(ir->type->base_type)
1161 {
1162 case GLSL_TYPE_FLOAT:
1163 elem = llvm::ConstantFP::get(base_type, ir->value.f[idx]);
1164 break;
1165 case GLSL_TYPE_UINT:
1166 elem = llvm::ConstantInt::get(base_type, ir->value.u[idx]);
1167 break;
1168 case GLSL_TYPE_INT:
1169 elem = llvm::ConstantInt::get(base_type, ir->value.i[idx]);
1170 break;
1171 case GLSL_TYPE_BOOL:
1172 elem = llvm::ConstantInt::get(base_type, ir->value.b[idx]);
1173 break;
1174 }
1175 elems.push_back(elem);
1176 ++idx;
1177 }
1178
1179 llvm::Constant* vec;
1180 if(ir->type->vector_elements > 1) {
1181 llvm::ArrayRef<llvm::Constant*> ConstantArray(elems);
1182 vec = llvm::ConstantVector::get(ConstantArray);
1183 } else {
1184 vec = elems[0];
1185 }
1186 vecs.push_back(vec);
1187 }
1188
1189 if(ir->type->matrix_columns > 1)
1190 result = llvm::ConstantArray::get((llvm::ArrayType*)type, vecs);
1191 else
1192 result = vecs[0];
1193 }
1194 }
1195
llvm_shuffle(llvm::Value * val,int * shuffle_mask,unsigned res_width,const llvm::Twine & name="")1196 llvm::Value* llvm_shuffle(llvm::Value* val, int* shuffle_mask, unsigned res_width, const llvm::Twine &name = "")
1197 {
1198 llvm::Type* elem_type = val->getType();
1199 llvm::Type* res_type = elem_type;;
1200 unsigned val_width = 1;
1201 if(val->getType()->isVectorTy())
1202 {
1203 val_width = ((llvm::VectorType*)val->getType())->getNumElements();
1204 elem_type = ((llvm::VectorType*)val->getType())->getElementType();
1205 }
1206 if(res_width > 1)
1207 res_type = llvm::VectorType::get(elem_type, res_width);
1208
1209 llvm::Constant* shuffle_mask_values[4];
1210 assert(res_width <= 4);
1211 bool any_def = false;
1212 for(unsigned i = 0; i < res_width; ++i)
1213 {
1214 if(shuffle_mask[i] < 0)
1215 shuffle_mask_values[i] = llvm::UndefValue::get(llvm::Type::getInt32Ty(ctx));
1216 else
1217 {
1218 any_def = true;
1219 shuffle_mask_values[i] = llvm_int(shuffle_mask[i]);
1220 }
1221 }
1222
1223 llvm::Value* undef = llvm::UndefValue::get(res_type);
1224 if(!any_def)
1225 return undef;
1226
1227 if(val_width > 1)
1228 {
1229 if(res_width > 1)
1230 {
1231 if(val_width == res_width)
1232 {
1233 bool nontrivial = false;
1234 for(unsigned i = 0; i < val_width; ++i)
1235 {
1236 if(shuffle_mask[i] != (int)i)
1237 nontrivial = true;
1238 }
1239 if(!nontrivial)
1240 return val;
1241 }
1242
1243 return bld.CreateShuffleVector(val, llvm::UndefValue::get(val->getType()), llvm::ConstantVector::get(pack(shuffle_mask_values, res_width)), name);
1244 }
1245 else
1246 return bld.CreateExtractElement(val, llvm_int(shuffle_mask[0]), name);
1247 }
1248 else
1249 {
1250 if(res_width > 1)
1251 {
1252 llvm::Value* tmp = undef;
1253 for(unsigned i = 0; i < res_width; ++i)
1254 {
1255 if(shuffle_mask[i] >= 0)
1256 tmp = bld.CreateInsertElement(tmp, val, llvm_int(i), name);
1257 }
1258 return tmp;
1259 }
1260 else if(shuffle_mask[0] >= 0)
1261 return val;
1262 else
1263 return undef;
1264 }
1265 }
1266
1267
visit(class ir_swizzle * swz)1268 virtual void visit(class ir_swizzle * swz)
1269 {
1270 llvm::Value* val = llvm_value(swz->val);
1271 int mask[4] = {swz->mask.x, swz->mask.y, swz->mask.z, swz->mask.w};
1272 result = llvm_shuffle(val, mask, swz->mask.num_components, "swizzle");
1273 }
1274
visit(class ir_assignment * ir)1275 virtual void visit(class ir_assignment * ir)
1276 {
1277 llvm::Value* lhs = llvm_pointer(ir->lhs);
1278 llvm::Value* rhs = llvm_value(ir->rhs);
1279 unsigned width = ir->lhs->type->vector_elements;
1280 unsigned mask = (1 << width) - 1;
1281 assert(rhs);
1282
1283 // TODO: masking for matrix assignment
1284 if (ir->rhs->type->is_matrix()) {
1285 bld.CreateStore(rhs, lhs, "mat_str");
1286 return;
1287 }
1288
1289 if (!(ir->write_mask & mask))
1290 return;
1291
1292 if (ir->rhs->type->vector_elements < width) {
1293 int expand_mask[4] = {-1, -1, -1, -1};
1294 for (unsigned i = 0; i < ir->lhs->type->vector_elements; ++i)
1295 expand_mask[i] = i;
1296 // printf("ve: %u w %u issw: %i\n", ir->rhs->type->vector_elements, width, !!ir->rhs->as_swizzle());
1297 rhs = llvm_shuffle(rhs, expand_mask, width, "assign.expand");
1298 }
1299
1300 if (width > 1 && (ir->write_mask & mask) != mask) {
1301 llvm::Constant* blend_mask[4];
1302 // refer to ir.h: ir_assignment::write_mask
1303 // A partially-set write mask means that each enabled channel gets
1304 // the value from a consecutive channel of the rhs.
1305 unsigned rhsChannel = 0;
1306 for (unsigned i = 0; i < width; ++i) {
1307 if (ir->write_mask & (1 << i))
1308 blend_mask[i] = llvm_int(width + rhsChannel++);
1309 else
1310 blend_mask[i] = llvm_int(i);
1311 }
1312 rhs = bld.CreateShuffleVector(bld.CreateLoad(lhs), rhs, llvm::ConstantVector::get(pack(blend_mask, width)), "assign.writemask");
1313 }
1314
1315 if(ir->condition)
1316 rhs = bld.CreateSelect(llvm_value(ir->condition), rhs, bld.CreateLoad(lhs), "assign.conditional");
1317
1318 bld.CreateStore(rhs, lhs);
1319 }
1320
visit(class ir_variable * var)1321 virtual void visit(class ir_variable * var)
1322 {
1323 llvm_variable(var);
1324 }
1325
visit(ir_function_signature * sig)1326 virtual void visit(ir_function_signature *sig)
1327 {
1328 if(!sig->is_defined)
1329 return;
1330
1331 assert(!fun);
1332 fun = llvm_function(sig);
1333
1334 bb = llvm::BasicBlock::Create(ctx, "entry", fun);
1335 bld.SetInsertPoint(bb);
1336
1337 llvm::Function::arg_iterator ai = fun->arg_begin();
1338 if (!strcmp("main",sig->function_name()))
1339 {
1340 assert(3 == fun->arg_size());
1341 bld.CreateStore(ai, inputsPtr);
1342 inputs = ai;
1343 ai++;
1344 bld.CreateStore(ai, outputsPtr);
1345 outputs = ai;
1346 ai++;
1347 bld.CreateStore(ai, constantsPtr);
1348 constants = ai;
1349 ai++;
1350 }
1351 else
1352 {
1353 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1354 ir_variable* arg = (ir_variable*)iter.get();
1355 ai->setName(arg->name);
1356 bld.CreateStore(ai, llvm_variable(arg));
1357 ++ai;
1358 }
1359 inputs = bld.CreateLoad(inputsPtr);
1360 outputs = bld.CreateLoad(outputsPtr);
1361 constants = bld.CreateLoad(constantsPtr);
1362 }
1363 inputs->setName("gl_inputs");
1364 outputs->setName("gl_outputs");
1365 constants->setName("gl_constants");
1366
1367
1368
1369 foreach_iter(exec_list_iterator, iter, sig->body) {
1370 ir_instruction *ir = (ir_instruction *)iter.get();
1371
1372 ir->accept(this);
1373 }
1374
1375 if(fun->getReturnType()->isVoidTy())
1376 bld.CreateRetVoid();
1377 else
1378 bld.CreateRet(llvm::UndefValue::get(fun->getReturnType()));
1379
1380 bb = NULL;
1381 fun = NULL;
1382 }
1383
visit(class ir_function * funs)1384 virtual void visit(class ir_function * funs)
1385 {
1386 foreach_iter(exec_list_iterator, iter, *funs)
1387 {
1388 ir_function_signature* sig = (ir_function_signature*)iter.get();
1389 sig->accept(this);
1390 }
1391 }
1392 };
1393
1394 struct llvm::Module *
glsl_ir_to_llvm_module(struct exec_list * ir,llvm::Module * mod,const struct GGLState * gglCtx,const char * shaderSuffix)1395 glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod,
1396 const struct GGLState * gglCtx, const char * shaderSuffix)
1397 {
1398 ir_to_llvm_visitor v(mod, gglCtx, shaderSuffix);
1399
1400 visit_exec_list(ir, &v);
1401
1402 // mod->dump();
1403 if(llvm::verifyModule(*mod, llvm::PrintMessageAction, 0))
1404 {
1405 puts("**\n module verification failed **\n");
1406 mod->dump();
1407 assert(0);
1408 return NULL;
1409 }
1410
1411 return mod;
1412 //v.ir_to_llvm_emit_op1(NULL, OPCODE_END, ir_to_llvm_undef_dst, ir_to_llvm_undef);
1413 }
1414