• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4  * Copyright © 2010 Intel Corporation
5  * Copyright © 2010 Luca Barbieri
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24  * DEALINGS IN THE SOFTWARE.
25  */
26 
27 /**
28  * \file ir_to_llvm.cpp
29  *
30  * Translates the IR to LLVM
31  */
32 
33 /* this tends to get set as part of LLVM_CFLAGS, but we definitely want asserts */
34 #ifdef NDEBUG
35 #undef NDEBUG
36 #endif
37 
38 #include "llvm/ADT/ArrayRef.h"
39 #include "llvm/DerivedTypes.h"
40 #include "llvm/LLVMContext.h"
41 #include "llvm/Module.h"
42 #include "llvm/Analysis/Verifier.h"
43 #include "llvm/Support/IRBuilder.h"
44 //#include "llvm/Intrinsics.h"
45 
46 #include <vector>
47 #include <stdio.h>
48 #include <map>
49 /*
50 #ifdef _MSC_VER
51 #include <unordered_map>
52 #else
53 #include <tr1/unordered_map>
54 #endif
55 // use C++0x/Microsoft convention
56 namespace std
57 {
58 using namespace tr1;
59 }
60 //*/
61 
62 #include "ir.h"
63 #include "ir_visitor.h"
64 #include "glsl_types.h"
65 #include "src/mesa/main/mtypes.h"
66 
67 // Helper function to convert array to llvm::ArrayRef
68 template <typename T, size_t N>
pack(T const (& array)[N])69 static inline llvm::ArrayRef<T> pack(T const (&array)[N]) {
70    return llvm::ArrayRef<T>(array);
71 }
72 
73 // Helper function to convert pointer + size to llvm::ArrayRef
74 template <typename T>
pack(T const * ptr,size_t n)75 static inline llvm::ArrayRef<T> pack(T const *ptr, size_t n) {
76    return llvm::ArrayRef<T>(ptr, n);
77 }
78 
79 struct GGLState;
80 
81 llvm::Value * tex2D(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
82                      const GGLState * gglCtx);
83 llvm::Value * texCube(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
84                      const GGLState * gglCtx);
85 
86 class ir_to_llvm_visitor : public ir_visitor {
87    ir_to_llvm_visitor();
88 public:
89 
90 
91    llvm::LLVMContext& ctx;
92    llvm::Module* mod;
93    llvm::Function* fun;
94    // could easily support more loops, but GLSL doesn't support multiloop break/continue
95    std::pair<llvm::BasicBlock*, llvm::BasicBlock*> loop;
96    llvm::BasicBlock* bb;
97    llvm::Value* result;
98    llvm::IRBuilder<> bld;
99 
100    const GGLState * gglCtx;
101    const char * shaderSuffix;
102    llvm::Value * inputsPtr, * outputsPtr, * constantsPtr; // internal globals to store inputs/outputs/constants pointers
103    llvm::Value * inputs, * outputs, * constants;
104 
ir_to_llvm_visitor(llvm::Module * p_mod,const GGLState * GGLCtx,const char * suffix)105    ir_to_llvm_visitor(llvm::Module* p_mod, const GGLState * GGLCtx, const char * suffix)
106    : ctx(p_mod->getContext()), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0,
107       (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx), shaderSuffix(suffix),
108       inputsPtr(NULL), outputsPtr(NULL), constantsPtr(NULL),
109       inputs(NULL), outputs(NULL), constants(NULL)
110    {
111       llvm::PointerType * const floatVecPtrType = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(),4), 0);
112       llvm::Constant * const nullFloatVecPtr = llvm::Constant::getNullValue(floatVecPtrType);
113       // make input, output and consts global pointers so they can be used in
114       // different LLVM functions since the shader shares these "registers" across "functions"
115 
116       inputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
117          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_inputPtr");
118 
119       outputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
120          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_outputsPtr");
121 
122       constantsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
123          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_constantsPtr");
124    }
125 
llvm_base_type(unsigned base_type)126    llvm::Type* llvm_base_type(unsigned base_type)
127    {
128       switch(base_type)
129       {
130       case GLSL_TYPE_VOID:
131          return llvm::Type::getVoidTy(ctx);
132       case GLSL_TYPE_UINT:
133       case GLSL_TYPE_INT:
134          return llvm::Type::getInt32Ty(ctx);
135       case GLSL_TYPE_FLOAT:
136          return llvm::Type::getFloatTy(ctx);
137       case GLSL_TYPE_BOOL:
138          return llvm::Type::getInt1Ty(ctx);
139       case GLSL_TYPE_SAMPLER:
140          return llvm::PointerType::getUnqual(llvm::Type::getVoidTy(ctx));
141       default:
142          assert(0);
143          return 0;
144       }
145    }
146 
llvm_vec_type(const glsl_type * type)147    llvm::Type* llvm_vec_type(const glsl_type* type)
148    {
149       if (type->is_array())
150          return llvm::ArrayType::get(llvm_type(type->fields.array), type->array_size());
151 
152       if (type->is_record())
153       {
154          std::vector<llvm::Type*> fields;
155          for (unsigned i = 0; i < type->length; i++)
156             fields.push_back(llvm_type(type->fields.structure[i].type));
157          return llvm::StructType::get(ctx, llvm::ArrayRef<llvm::Type*>(
158              fields));
159       }
160 
161       llvm::Type* base_type = llvm_base_type(type->base_type);
162       if (type->vector_elements <= 1) {
163          return base_type;
164       } else {
165          return llvm::VectorType::get(base_type, type->vector_elements);
166       }
167    }
168 
llvm_type(const glsl_type * type)169    llvm::Type* llvm_type(const glsl_type* type)
170    {
171       llvm::Type* vec_type = llvm_vec_type(type);
172       if (type->matrix_columns <= 1) {
173          return vec_type;
174       } else {
175          return llvm::ArrayType::get(vec_type, type->matrix_columns);
176       }
177    }
178 
179    typedef std::map<ir_variable*, llvm::Value*> llvm_variables_t;
180    //typedef std::unordered_map<ir_variable*, llvm::Value*> llvm_variables_t;
181    llvm_variables_t llvm_variables;
182 
llvm_variable(class ir_variable * var)183    llvm::Value* llvm_variable(class ir_variable* var)
184    {
185       llvm_variables_t::iterator vari = llvm_variables.find(var);
186       if (vari != llvm_variables.end()) {
187          return vari->second;
188       } else {
189          llvm::Type* type = llvm_type(var->type);
190 
191          llvm::Value* v = NULL;
192          if(fun) {
193             if (ir_var_in == var->mode)
194             {
195                assert(var->location >= 0);
196                v = bld.CreateConstGEP1_32(inputs, var->location);
197                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
198             }
199             else if (ir_var_out == var->mode)
200             {
201                assert(var->location >= 0);
202                v = bld.CreateConstGEP1_32(outputs, var->location);
203                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
204             }
205             else if (ir_var_uniform == var->mode)
206             {
207                assert(var->location >= 0);
208                v = bld.CreateConstGEP1_32(constants, var->location);
209                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
210             }
211             else
212             {
213                if(bb == &fun->getEntryBlock())
214                   v = bld.CreateAlloca(type, 0, var->name);
215                else
216                   v = new llvm::AllocaInst(type, 0, var->name, fun->getEntryBlock().getTerminator());
217             }
218          } else {
219            // TODO: can anything global be non-constant in GLSL?; fix linkage
220             //printf("var '%s' mode=%d location=%d \n", var->name, var->mode, var->location);
221             switch(var->mode)
222             {
223                case ir_var_auto: // fall through
224                case ir_var_temporary:
225                {
226                   llvm::Constant * init = llvm::UndefValue::get(llvm_type(var->type));
227                   if(var->constant_value)
228                      init = llvm_constant(var->constant_value);
229                   v = new llvm::GlobalVariable(*mod, type, var->read_only, llvm::GlobalValue::InternalLinkage, init, var->name);
230                   break;
231                }
232                case ir_var_in: // fall through
233                case ir_var_out: // fall through
234                case ir_var_uniform: // fall through
235                   assert(var->location >= 0);
236                   return NULL; // variable outside of function means declaration
237                default:
238                   assert(0);
239             }
240 
241 //            llvm::Function::LinkageTypes linkage;
242 //            if(var->mode == ir_var_auto || var->mode == ir_var_temporary)
243 //               linkage = llvm::GlobalValue::InternalLinkage;
244 //            else
245 //               linkage = llvm::GlobalValue::ExternalLinkage;
246 //            llvm::Constant* init = 0;
247 //            if(var->constant_value)
248 //            {
249 //               init = llvm_constant(var->constant_value);
250 //               // this constants need to be external (ie. written to output)
251 //               if (llvm::GlobalValue::ExternalLinkage == linkage)
252 //                  linkage = llvm::GlobalValue::AvailableExternallyLinkage;
253 //            }
254 //            else if(linkage == llvm::GlobalValue::InternalLinkage)
255 //               init = llvm::UndefValue::get(llvm_type(var->type));
256 //            v = new llvm::GlobalVariable(*mod, type, var->read_only, linkage, init, var->name);
257          }
258          assert(v);
259          llvm_variables[var] = v;
260          return v;
261       }
262    }
263 
264    //typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t;
265    //typedef std::unordered_map<ir_function_signature*, llvm::Function*> llvm_functions_t;
266    //llvm_functions_t llvm_functions;
267 
llvm_function(class ir_function_signature * sig)268    llvm::Function* llvm_function(class ir_function_signature* sig)
269    {
270       const char* name = sig->function_name();
271       char * functionName = (char *)malloc(strlen(name) + strlen(shaderSuffix) + 1);
272       strcpy(functionName, name);
273       strcat(functionName, shaderSuffix);
274       llvm::Function * function = mod->getFunction(functionName);
275       if (function)
276       {
277          free(functionName);
278          return function;
279       }
280       else
281       {
282          llvm::Function::LinkageTypes linkage;
283          std::vector<llvm::Type*> params;
284          foreach_iter(exec_list_iterator, iter, sig->parameters) {
285             ir_variable* arg = (ir_variable*)iter.get();
286             params.push_back(llvm_type(arg->type));
287          }
288 
289          if(!strcmp(name, "main") || !sig->is_defined)
290          {
291             linkage = llvm::Function::ExternalLinkage;
292             llvm::PointerType * vecPtrTy = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(), 4), 0);
293             assert(0 == params.size());
294             params.push_back(vecPtrTy); // inputs
295             params.push_back(vecPtrTy); // outputs
296             params.push_back(vecPtrTy); // constants
297          }
298          else {
299             linkage = llvm::Function::InternalLinkage;
300          }
301          llvm::FunctionType* ft = llvm::FunctionType::get(llvm_type(sig->return_type),
302                                                           llvm::ArrayRef<llvm::Type*>(params),
303                                                           false);
304          function = llvm::Function::Create(ft, linkage, functionName, mod);
305          free(functionName);
306          return function;
307       }
308    }
309 
llvm_value(class ir_instruction * ir)310    llvm::Value* llvm_value(class ir_instruction* ir)
311    {
312       result = 0;
313       ir->accept(this);
314       return result;
315    }
316 
llvm_constant(class ir_instruction * ir)317    llvm::Constant* llvm_constant(class ir_instruction* ir)
318    {
319       return (llvm::Constant *)llvm_value(ir);
320       //return &dynamic_cast<llvm::Constant&>(*llvm_value(ir));
321    }
322 
llvm_int(unsigned v)323    llvm::Constant* llvm_int(unsigned v)
324    {
325       return llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), v);
326    }
327 
llvm_pointer(class ir_rvalue * ir)328    llvm::Value* llvm_pointer(class ir_rvalue* ir)
329    {
330       if(ir_dereference_variable* deref = ir->as_dereference_variable())
331          return llvm_variable(deref->variable_referenced());
332       else if(ir_dereference_array* deref = ir->as_dereference_array())
333       {
334          llvm::Value* gep[2] = {llvm_int(0), llvm_value(deref->array_index)};
335          return bld.CreateInBoundsGEP(llvm_pointer(deref->array), gep);
336          }
337       else if(ir->as_dereference())
338       {
339          ir_dereference_record* deref = (ir_dereference_record*)ir;
340          int idx = deref->record->type->field_index(deref->field);
341          assert(idx >= 0);
342          return bld.CreateConstInBoundsGEP2_32(llvm_pointer(deref->record), 0, idx);
343       }
344       else
345       {
346          assert(0);
347          return 0;
348       }
349    }
350 
351 //   llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a)
352 //   {
353 //      llvm::Type* types[1] = {a->getType()};
354 //      return bld.CreateCall(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a);
355 //   }
356 //
357 //   llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a, llvm::Value* b)
358 //   {
359 //      llvm::Type* types[2] = {a->getType(), b->getType()};
360 //      /* only one type suffix is usually needed, so pass 1 here */
361 //      return bld.CreateCall2(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a, b);
362 //   }
363 
llvm_intrinsic_unop(ir_expression_operation op,llvm::Value * op0)364    llvm::Value* llvm_intrinsic_unop(ir_expression_operation op, llvm::Value * op0)
365    {
366       llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
367       const char * name = NULL;
368       switch (op) {
369       case ir_unop_sin:
370          name = "sinf";
371          break;
372       case ir_unop_cos:
373          name = "cosf";
374          break;
375       default:
376          assert(0);
377       }
378 
379       llvm::Function * function = mod->getFunction(name);
380       if (!function) {
381          // predeclare the intrinsic
382          std::vector<llvm::Type*> args;
383          args.push_back(floatType);
384          llvm::FunctionType* type = llvm::FunctionType::get(floatType,
385                                                             llvm::ArrayRef<llvm::Type*>(args),
386                                                             false);
387          function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
388          function->setCallingConv(llvm::CallingConv::C);
389       }
390 
391       return bld.CreateCall(function, op0);
392    }
393 
llvm_intrinsic_binop(ir_expression_operation op,llvm::Value * op0,llvm::Value * op1)394    llvm::Value* llvm_intrinsic_binop(ir_expression_operation op, llvm::Value * op0, llvm::Value * op1)
395    {
396       llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
397       const char * name = NULL;
398       switch (op) {
399       case ir_binop_pow:
400          name = "powf";
401          break;
402       default:
403          assert(0);
404       }
405 
406       llvm::Function * function = mod->getFunction(name);
407       if (!function) {
408          // predeclare the intrinsic
409          std::vector<llvm::Type*> args;
410          args.push_back(floatType);
411          args.push_back(floatType);
412          llvm::FunctionType* type = llvm::FunctionType::get(floatType,
413                                                             llvm::ArrayRef<llvm::Type*>(args),
414                                                             false);
415          function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
416          function->setCallingConv(llvm::CallingConv::C);
417       }
418 
419       return bld.CreateCall2(function, op0, op1);
420    }
421 
llvm_imm(llvm::Type * type,double v)422    llvm::Constant* llvm_imm(llvm::Type* type, double v)
423    {
424       if(type->isVectorTy())
425       {
426          std::vector<llvm::Constant*> values;
427          values.push_back(llvm_imm(((llvm::VectorType*)type)->getElementType(), v));
428          for(unsigned i = 1; i < ((llvm::VectorType*)type)->getNumElements(); ++i)
429             values.push_back(values[0]);
430          return llvm::ConstantVector::get(values);
431       }
432       else if(type->isIntegerTy())
433          return llvm::ConstantInt::get(type, v);
434       else if(type->isFloatingPointTy())
435          return llvm::ConstantFP::get(type, v);
436       else
437       {
438          assert(0);
439          return 0;
440       }
441    }
442 
create_shuffle3(llvm::IRBuilder<> & bld,llvm::Value * v,unsigned a,unsigned b,unsigned c,const llvm::Twine & name="")443    static llvm::Value* create_shuffle3(llvm::IRBuilder<>& bld, llvm::Value* v, unsigned a, unsigned b, unsigned c, const llvm::Twine& name = "")
444    {
445       llvm::Type* int_ty = llvm::Type::getInt32Ty(v->getContext());
446       llvm::Constant* vals[3] = {llvm::ConstantInt::get(int_ty, a), llvm::ConstantInt::get(int_ty, b), llvm::ConstantInt::get(int_ty, c)};
447       return bld.CreateShuffleVector(v, llvm::UndefValue::get(v->getType()), llvm::ConstantVector::get(pack(vals)), name);
448    }
449 
create_select(unsigned width,llvm::Value * cond,llvm::Value * tru,llvm::Value * fal,const char * name="")450    llvm::Value* create_select(unsigned width, llvm::Value * cond, llvm::Value * tru, llvm::Value * fal, const char * name = "")
451    {
452       if (1 == width)
453          return bld.CreateSelect(cond, tru, fal, name);
454 
455       llvm::Type * vectorType = tru->getType();
456       llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
457       for (unsigned int i = 0; i < width; i++) {
458          llvm::Value * c = bld.CreateExtractElement(cond, llvm_int(i));
459          llvm::Value * t = bld.CreateExtractElement(tru, llvm_int(i));
460          llvm::Value * f = bld.CreateExtractElement(fal, llvm_int(i));
461          llvm::Value * v = bld.CreateSelect(c, t, f, name);
462          vector = bld.CreateInsertElement(vector, v, llvm_int(i), "vslct");
463       }
464       return vector;
465    }
466 
create_dot_product(llvm::Value * ops0,llvm::Value * ops1,glsl_base_type type,unsigned width)467    llvm::Value* create_dot_product(llvm::Value* ops0, llvm::Value* ops1, glsl_base_type type, unsigned width)
468    {
469       llvm::Value* prod;
470       switch (type) {
471       case GLSL_TYPE_UINT:
472       case GLSL_TYPE_INT:
473          prod = bld.CreateMul(ops0, ops1, "dot.mul");
474          break;
475       case GLSL_TYPE_FLOAT:
476          prod = bld.CreateFMul(ops0, ops1, "dot.mul");
477          break;
478       default:
479          assert(0);
480       }
481 
482       if (width<= 1)
483          return prod;
484 
485       llvm::Value* sum = 0;
486       for (unsigned i = 0; i < width; ++i) {
487          llvm::Value* elem = bld.CreateExtractElement(prod, llvm_int(i), "dot.elem");
488          if (sum) {
489             if (type == GLSL_TYPE_FLOAT)
490                sum = bld.CreateFAdd(sum, elem, "dot.add");
491             else
492                sum = bld.CreateAdd(sum, elem, "dot.add");
493          }
494          else
495             sum = elem;
496       }
497       return sum;
498    }
499 
llvm_expression(ir_expression * ir)500    llvm::Value* llvm_expression(ir_expression* ir)
501    {
502       llvm::Value* ops[2];
503       for(unsigned i = 0; i < ir->get_num_operands(); ++i)
504          ops[i] = llvm_value(ir->operands[i]);
505 
506       if(ir->get_num_operands() == 2)
507       {
508          int vecidx = -1;
509          int scaidx = -1;
510          if(ir->operands[0]->type->vector_elements <= 1 && ir->operands[1]->type->vector_elements > 1)
511          {
512             scaidx = 0;
513             vecidx = 1;
514          }
515          else if(ir->operands[0]->type->vector_elements > 1 && ir->operands[1]->type->vector_elements <= 1)
516          {
517             scaidx = 1;
518             vecidx = 0;
519          }
520          else
521             assert(ir->operands[0]->type->vector_elements == ir->operands[1]->type->vector_elements);
522 
523          if(scaidx >= 0)
524          {
525             llvm::Value* vec;
526             vec = llvm::UndefValue::get(ops[vecidx]->getType());
527             for(unsigned i = 0; i < ir->operands[vecidx]->type->vector_elements; ++i)
528                vec = bld.CreateInsertElement(vec,  ops[scaidx], llvm_int(i), "sca2vec");
529             ops[scaidx] = vec;
530          }
531       }
532 
533       switch (ir->operation) {
534       case ir_unop_logic_not:
535          return bld.CreateNot(ops[0]);
536       case ir_unop_neg:
537          switch (ir->operands[0]->type->base_type) {
538          case GLSL_TYPE_UINT:
539          case GLSL_TYPE_BOOL:
540          case GLSL_TYPE_INT:
541             return bld.CreateNeg(ops[0]);
542          case GLSL_TYPE_FLOAT:
543             return bld.CreateFNeg(ops[0]);
544          default:
545             assert(0);
546          }
547       case ir_unop_abs:
548          switch (ir->operands[0]->type->base_type) {
549          case GLSL_TYPE_UINT:
550          case GLSL_TYPE_BOOL:
551             return ops[0];
552          case GLSL_TYPE_INT:
553             return create_select(ir->operands[0]->type->vector_elements,
554                                  bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "sabs.ge"),
555                                  ops[0], bld.CreateNeg(ops[0], "sabs.neg"), "sabs.select");
556          case GLSL_TYPE_FLOAT:
557             return create_select(ir->operands[0]->type->vector_elements,
558                                  bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fabs.ge"),
559                                  ops[0], bld.CreateFNeg(ops[0], "fabs.neg"), "fabs.select");
560          default:
561             assert(0);
562          }
563       case ir_unop_sign:
564          switch (ir->operands[0]->type->base_type) {
565          case GLSL_TYPE_BOOL:
566             return ops[0];
567          case GLSL_TYPE_UINT:
568             return bld.CreateZExt(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "usign.ne"), ops[0]->getType(), "usign.zext");
569          case GLSL_TYPE_INT:
570             return bld.CreateSelect(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ne"),
571                                     bld.CreateSelect(bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "sabs.selects"),
572                                     llvm_imm(ops[0]->getType(), 0), "sabs.select0");
573          case GLSL_TYPE_FLOAT:
574             return bld.CreateSelect(bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ne"),
575                                     bld.CreateSelect(bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "fabs.selects"),
576                                     llvm_imm(ops[0]->getType(), 0), "fabs.select0");
577          default:
578             assert(0);
579          }
580       case ir_unop_rcp:
581          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
582          return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), ops[0]);
583       case ir_unop_exp: // fall through
584       case ir_unop_exp2: // fall through
585       case ir_unop_log: // fall through
586       case ir_unop_log2: // fall through
587       case ir_unop_sin: // fall through
588       case ir_unop_cos:
589          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
590          return llvm_intrinsic_unop(ir->operation, ops[0]);
591          // TODO: implement these somehow
592       case ir_unop_dFdx:
593          assert(0);
594          //return llvm_intrinsic(llvm::Intrinsic::ddx, ops[0]);
595       case ir_unop_dFdy:
596          assert(0);
597          //return llvm_intrinsic(llvm::Intrinsic::ddy, ops[0]);
598       case ir_binop_add:
599          switch(ir->operands[0]->type->base_type)
600          {
601          case GLSL_TYPE_BOOL:
602          case GLSL_TYPE_UINT:
603          case GLSL_TYPE_INT:
604             return bld.CreateAdd(ops[0], ops[1]);
605          case GLSL_TYPE_FLOAT:
606             return bld.CreateFAdd(ops[0], ops[1]);
607          default:
608             assert(0);
609          }
610       case ir_binop_sub:
611          switch(ir->operands[0]->type->base_type)
612          {
613          case GLSL_TYPE_BOOL:
614          case GLSL_TYPE_UINT:
615          case GLSL_TYPE_INT:
616             return bld.CreateSub(ops[0], ops[1]);
617          case GLSL_TYPE_FLOAT:
618             return bld.CreateFSub(ops[0], ops[1]);
619          default:
620             assert(0);
621          }
622       case ir_binop_mul:
623          if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_vector())
624             assert(0);
625          else if (ir->operands[0]->type->is_vector() && ir->operands[1]->type->is_matrix()) {
626             assert(0); // matrix multiplication should have been lowered to vector ops
627 			llvm::VectorType * vectorType = llvm::VectorType::get(llvm_base_type(ir->operands[1]->type->base_type), ir->operands[1]->type->matrix_columns);
628             llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
629             for (unsigned int i = 0; i < ir->operands[1]->type->matrix_columns; i++) {
630                llvm::Value * value = bld.CreateExtractValue(ops[1], i, "vec*mat_col");
631                value = create_dot_product(value, ops[0], ir->operands[1]->type->base_type, ir->operands[1]->type->vector_elements);
632                vector = bld.CreateInsertElement(vector, value, llvm_int(i), "vec*mat_res");
633             }
634             return vector;
635          }
636          else if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_matrix())
637             assert(0);
638 
639          switch (ir->operands[0]->type->base_type) {
640          case GLSL_TYPE_BOOL:
641             return bld.CreateAnd(ops[0], ops[1]);
642          case GLSL_TYPE_UINT:
643          case GLSL_TYPE_INT:
644             return bld.CreateMul(ops[0], ops[1]);
645          case GLSL_TYPE_FLOAT:
646             return bld.CreateFMul(ops[0], ops[1]);
647          default:
648             assert(0);
649          }
650          case ir_binop_div:
651          switch(ir->operands[0]->type->base_type)
652          {
653          case GLSL_TYPE_BOOL:
654          case GLSL_TYPE_UINT:
655             return bld.CreateUDiv(ops[0], ops[1]);
656          case GLSL_TYPE_INT:
657             return bld.CreateSDiv(ops[0], ops[1]);
658          case GLSL_TYPE_FLOAT:
659             return bld.CreateFDiv(ops[0], ops[1]);
660          default:
661             assert(0);
662          }
663       case ir_binop_mod:
664          switch(ir->operands[0]->type->base_type)
665          {
666          case GLSL_TYPE_BOOL:
667          case GLSL_TYPE_UINT:
668             return bld.CreateURem(ops[0], ops[1]);
669          case GLSL_TYPE_INT:
670             return bld.CreateSRem(ops[0], ops[1]);
671          case GLSL_TYPE_FLOAT:
672             return bld.CreateFRem(ops[0], ops[1]);
673          default:
674             assert(0);
675          }
676       case ir_binop_less:
677          switch(ir->operands[0]->type->base_type)
678          {
679          case GLSL_TYPE_BOOL:
680          case GLSL_TYPE_UINT:
681             return bld.CreateICmpULT(ops[0], ops[1]);
682          case GLSL_TYPE_INT:
683             return bld.CreateICmpSLT(ops[0], ops[1]);
684          case GLSL_TYPE_FLOAT:
685             return bld.CreateFCmpOLT(ops[0], ops[1]);
686          default:
687             assert(0);
688          }
689       case ir_binop_greater:
690          switch(ir->operands[0]->type->base_type)
691          {
692          case GLSL_TYPE_BOOL:
693          case GLSL_TYPE_UINT:
694             return bld.CreateICmpUGT(ops[0], ops[1]);
695          case GLSL_TYPE_INT:
696             return bld.CreateICmpSGT(ops[0], ops[1]);
697          case GLSL_TYPE_FLOAT:
698             return bld.CreateFCmpOGT(ops[0], ops[1]);
699          default:
700             assert(0);
701          }
702       case ir_binop_lequal:
703          switch(ir->operands[0]->type->base_type)
704          {
705          case GLSL_TYPE_BOOL:
706          case GLSL_TYPE_UINT:
707             return bld.CreateICmpULE(ops[0], ops[1]);
708          case GLSL_TYPE_INT:
709             return bld.CreateICmpSLE(ops[0], ops[1]);
710          case GLSL_TYPE_FLOAT:
711             return bld.CreateFCmpOLE(ops[0], ops[1]);
712          default:
713             assert(0);
714          }
715       case ir_binop_gequal:
716          switch(ir->operands[0]->type->base_type)
717          {
718          case GLSL_TYPE_BOOL:
719          case GLSL_TYPE_UINT:
720             return bld.CreateICmpUGE(ops[0], ops[1]);
721          case GLSL_TYPE_INT:
722             return bld.CreateICmpSGE(ops[0], ops[1]);
723          case GLSL_TYPE_FLOAT:
724             return bld.CreateFCmpOGE(ops[0], ops[1]);
725          default:
726             assert(0);
727          }
728       case ir_binop_equal: // fall through
729       case ir_binop_all_equal: // TODO: check op same as ir_binop_equal
730          switch (ir->operands[0]->type->base_type) {
731          case GLSL_TYPE_BOOL:
732          case GLSL_TYPE_UINT:
733          case GLSL_TYPE_INT:
734             return bld.CreateICmpEQ(ops[0], ops[1]);
735          case GLSL_TYPE_FLOAT:
736             return bld.CreateFCmpOEQ(ops[0], ops[1]);
737          default:
738             assert(0);
739          }
740       case ir_binop_nequal:
741          switch(ir->operands[0]->type->base_type)
742          {
743          case GLSL_TYPE_BOOL:
744          case GLSL_TYPE_UINT:
745          case GLSL_TYPE_INT:
746             return bld.CreateICmpNE(ops[0], ops[1]);
747          case GLSL_TYPE_FLOAT:
748             return bld.CreateFCmpONE(ops[0], ops[1]);
749          default:
750             assert(0);
751          }
752       case ir_binop_logic_xor:
753          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
754          return bld.CreateICmpNE(ops[0], ops[1]);
755       case ir_binop_logic_or:
756          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
757          return bld.CreateOr(ops[0], ops[1]);
758       case ir_binop_logic_and:
759          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
760          return bld.CreateAnd(ops[0], ops[1]);
761       case ir_binop_dot:
762          return create_dot_product(ops[0], ops[1], ir->operands[0]->type->base_type, ir->operands[0]->type->vector_elements);
763 //      case ir_binop_cross: this op does not exist in ir.h
764 //         assert(ir->operands[0]->type->vector_elements == 3);
765 //         switch(ir->operands[0]->type->base_type)
766 //         {
767 //         case GLSL_TYPE_UINT:
768 //         case GLSL_TYPE_INT:
769 //            return bld.CreateSub(
770 //                  bld.CreateMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
771 //                  bld.CreateMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
772 //                  "cross.sub");
773 //         case GLSL_TYPE_FLOAT:
774 //            return bld.CreateFSub(
775 //                  bld.CreateFMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
776 //                  bld.CreateFMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
777 //                  "cross.sub");
778 //         default:
779 //            assert(0);
780 //         }
781       case ir_unop_sqrt:
782          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
783          return llvm_intrinsic_unop(ir->operation, ops[0]);
784       case ir_unop_rsq:
785          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
786          return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), llvm_intrinsic_unop(ir_unop_sqrt, ops[0]), "rsqrt.rcp");
787       case ir_unop_i2f:
788          return bld.CreateSIToFP(ops[0], llvm_type(ir->type));
789       case ir_unop_u2f:
790       case ir_unop_b2f:
791          return bld.CreateUIToFP(ops[0], llvm_type(ir->type));
792       case ir_unop_b2i:
793          return bld.CreateZExt(ops[0], llvm_type(ir->type));
794       case ir_unop_f2i:
795          return bld.CreateFPToSI(ops[0], llvm_type(ir->type));
796       case ir_unop_f2b:
797          return bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0));
798       case ir_unop_i2b:
799          return bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0));
800       case ir_unop_trunc:
801       {
802          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
803             return ops[0];
804          glsl_type int_type = *ir->operands[0]->type;
805          int_type.base_type = GLSL_TYPE_INT;
806          return bld.CreateSIToFP(bld.CreateFPToSI(ops[0], llvm_type(&int_type), "trunc.fptosi"),ops[0]->getType(), "trunc.sitofp");
807       }
808       case ir_unop_floor:
809       {
810          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
811             return ops[0];
812          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
813          return bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one));
814       }
815       case ir_unop_ceil:
816       {
817          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
818             return ops[0];
819          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
820          return bld.CreateFAdd(bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one)), one);
821       }
822       case ir_unop_fract:
823       {
824          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
825             return llvm_imm(ops[0]->getType(), 0);
826          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
827          return bld.CreateFRem(ops[0], one);
828       }
829       // TODO: NaNs might be wrong in min/max, not sure how to fix it
830       case ir_binop_min:
831          switch(ir->operands[0]->type->base_type)
832          {
833          case GLSL_TYPE_BOOL:
834             return bld.CreateAnd(ops[0], ops[1], "bmin");
835          case GLSL_TYPE_UINT:
836             return bld.CreateSelect(bld.CreateICmpULE(ops[0], ops[1], "umin.le"), ops[0], ops[1], "umin.select");
837          case GLSL_TYPE_INT:
838             return bld.CreateSelect(bld.CreateICmpSLE(ops[0], ops[1], "smin.le"), ops[0], ops[1], "smin.select");
839          case GLSL_TYPE_FLOAT:
840             return bld.CreateSelect(bld.CreateFCmpULE(ops[0], ops[1], "fmin.le"), ops[0], ops[1], "fmin.select");
841          default:
842             assert(0);
843          }
844       case ir_binop_max:
845          switch(ir->operands[0]->type->base_type)
846          {
847          case GLSL_TYPE_BOOL:
848             return bld.CreateOr(ops[0], ops[1], "bmax");
849          case GLSL_TYPE_UINT:
850             return bld.CreateSelect(bld.CreateICmpUGE(ops[0], ops[1], "umax.ge"), ops[0], ops[1], "umax.select");
851          case GLSL_TYPE_INT:
852             return bld.CreateSelect(bld.CreateICmpSGE(ops[0], ops[1], "smax.ge"), ops[0], ops[1], "smax.select");
853          case GLSL_TYPE_FLOAT:
854             return bld.CreateSelect(bld.CreateFCmpUGE(ops[0], ops[1], "fmax.ge"), ops[0], ops[1], "fmax.select");
855          default:
856             assert(0);
857          }
858       case ir_binop_pow:
859          assert(GLSL_TYPE_FLOAT == ir->operands[0]->type->base_type);
860          assert(GLSL_TYPE_FLOAT == ir->operands[1]->type->base_type);
861          return llvm_intrinsic_binop(ir_binop_pow, ops[0], ops[1]);
862       case ir_unop_bit_not:
863          return bld.CreateNot(ops[0]);
864       case ir_binop_bit_and:
865          return bld.CreateAnd(ops[0], ops[1]);
866       case ir_binop_bit_xor:
867          return bld.CreateXor(ops[0], ops[1]);
868       case ir_binop_bit_or:
869          return bld.CreateOr(ops[0], ops[1]);
870       case ir_binop_lshift:
871          switch(ir->operands[0]->type->base_type)
872          {
873          case GLSL_TYPE_BOOL:
874          case GLSL_TYPE_UINT:
875          case GLSL_TYPE_INT:
876             return bld.CreateLShr(ops[0], ops[1]);
877          default:
878             assert(0);
879          }
880       case ir_binop_rshift:
881          switch(ir->operands[0]->type->base_type)
882          {
883          case GLSL_TYPE_BOOL:
884          case GLSL_TYPE_UINT:
885             return bld.CreateLShr(ops[0], ops[1]);
886          case GLSL_TYPE_INT:
887             return bld.CreateAShr(ops[0], ops[1]);
888          default:
889             assert(0);
890             return 0;
891          }
892       default:
893          printf("ir->operation=%d \n", ir->operation);
894          assert(0);
895          return 0;
896       }
897    }
898 
visit(class ir_expression * ir)899    virtual void visit(class ir_expression * ir)
900    {
901       result = llvm_expression(ir);
902    }
903 
visit(class ir_dereference_array * ir)904    virtual void visit(class ir_dereference_array *ir)
905    {
906       result = bld.CreateLoad(llvm_pointer(ir));
907    }
908 
visit(class ir_dereference_record * ir)909    virtual void visit(class ir_dereference_record *ir)
910    {
911       result = bld.CreateLoad(llvm_pointer(ir));
912    }
913 
visit(class ir_dereference_variable * ir)914    virtual void visit(class ir_dereference_variable *ir)
915    {
916       result = bld.CreateLoad(llvm_pointer(ir), ir->variable_referenced()->name);
917    }
918 
visit(class ir_texture * ir)919    virtual void visit(class ir_texture * ir)
920    {
921       llvm::Value * coordinate = llvm_value(ir->coordinate);
922       if (ir->projector)
923       {
924          llvm::Value * proj = llvm_value(ir->projector);
925          unsigned width = ((llvm::VectorType*)coordinate->getType())->getNumElements();
926          llvm::Value * div = llvm::Constant::getNullValue(coordinate->getType());
927          for (unsigned i = 0; i < width; i++)
928             div = bld.CreateInsertElement(div, proj, bld.getInt32(i), "texProjDup");
929          coordinate = bld.CreateFDiv(coordinate, div, "texProj");
930       }
931 
932       ir_variable * sampler = NULL;
933       if(ir_dereference_variable* deref = ir->sampler->as_dereference_variable())
934          sampler = deref->variable_referenced();
935       else if(ir_dereference_array* deref = ir->sampler->as_dereference_array())
936       {
937          assert(0); // not implemented
938          return;
939          deref->array_index;
940          deref->array;
941       }
942       else if(ir->sampler->as_dereference())
943       {
944          assert(0); // not implemented
945          ir_dereference_record* deref = (ir_dereference_record*)ir->sampler;
946          int idx = deref->record->type->field_index(deref->field);
947          assert(idx >= 0);
948       }
949       else
950          assert(0);
951 
952       assert(sampler->location >= 0 && sampler->location < 64); // TODO: proper limit
953 
954       // ESSL texture LOD is only for 2D texture in vert shader, and it's explicit
955       // bias used only in frag shader, and added to computed LOD
956       assert(ir_tex == ir->op);
957 
958       assert(GLSL_TYPE_FLOAT == sampler->type->sampler_type);
959       printf("sampler '%s' location=%d dim=%d type=%d proj=%d lod=%d \n", sampler->name, sampler->location,
960          sampler->type->sampler_dimensionality, sampler->type->sampler_type,
961          ir->projector ? 1 : 0, ir->lod_info.lod ? 1 : 0);
962       if (GLSL_SAMPLER_DIM_CUBE == sampler->type->sampler_dimensionality)
963          result = texCube(bld, coordinate, sampler->location, gglCtx);
964       else if (GLSL_SAMPLER_DIM_2D == sampler->type->sampler_dimensionality)
965          result = tex2D(bld, coordinate, sampler->location, gglCtx);
966       else
967          assert(0);
968    }
969 
visit(class ir_discard * ir)970    virtual void visit(class ir_discard * ir)
971    {
972       llvm::BasicBlock* discard = llvm::BasicBlock::Create(ctx, "discard", fun);
973       llvm::BasicBlock* after;
974       if(ir->condition)
975       {
976          after = llvm::BasicBlock::Create(ctx, "discard.survived", fun);
977          bld.CreateCondBr(llvm_value(ir->condition), discard, after);
978       }
979       else
980       {
981          after = llvm::BasicBlock::Create(ctx, "dead_code.discard", fun);
982          bld.CreateBr(discard);
983       }
984 
985       bld.SetInsertPoint(discard);
986 
987       // FIXME: According to the LLVM mailing list, UnwindInst should not
988       // be used by the frontend since LLVM 3.0, and 'CreateUnwind'
989       // method has been removed from the IRBuilder.  Here's the
990       // temporary workaround.  But it would be better to remove
991       // this in the future.
992       //
993       // A solution after LLVM 3.0: To add a global boolean in the shader to
994       // store whether it was discarded or not and just continue on normally,
995       // and handle the discard outside the shader, in the scanline function.
996       // The discard instruction is not used frequently, so it should be okay
997       // performance wise.
998       //new llvm::UnwindInst(ctx, discard); /// Deprecated
999 
1000       bb = after;
1001       bld.SetInsertPoint(bb);
1002    }
1003 
visit(class ir_loop_jump * ir)1004    virtual void visit(class ir_loop_jump *ir)
1005    {
1006       llvm::BasicBlock* target;
1007       if(ir->mode == ir_loop_jump::jump_continue)
1008          target = loop.first;
1009       else if(ir->mode == ir_loop_jump::jump_break)
1010          target = loop.second;
1011       assert(target);
1012 
1013       bld.CreateBr(target);
1014 
1015       bb = llvm::BasicBlock::Create(ctx, "dead_code.jump", fun);
1016       bld.SetInsertPoint(bb);
1017    }
1018 
visit(class ir_loop * ir)1019    virtual void visit(class ir_loop * ir)
1020    {
1021       llvm::BasicBlock* body = llvm::BasicBlock::Create(ctx, "loop", fun);
1022       llvm::BasicBlock* header = body;
1023       llvm::BasicBlock* after = llvm::BasicBlock::Create(ctx, "loop.after", fun);
1024       llvm::Value* ctr;
1025 
1026       if(ir->counter)
1027       {
1028          ctr = llvm_variable(ir->counter);
1029          if(ir->from)
1030             bld.CreateStore(llvm_value(ir->from), ctr);
1031          if(ir->to)
1032             header = llvm::BasicBlock::Create(ctx, "loop.header", fun);
1033       }
1034 
1035       bld.CreateBr(header);
1036 
1037       if(ir->counter && ir->to)
1038       {
1039          bld.SetInsertPoint(header);
1040          llvm::Value* cond;
1041          llvm::Value* load = bld.CreateLoad(ctr);
1042          llvm::Value* to = llvm_value(ir->to);
1043          switch(ir->counter->type->base_type)
1044          {
1045          case GLSL_TYPE_BOOL:
1046          case GLSL_TYPE_UINT:
1047             cond = bld.CreateICmpULT(load, to);
1048             break;
1049          case GLSL_TYPE_INT:
1050             cond = bld.CreateICmpSLT(load, to);
1051             break;
1052          case GLSL_TYPE_FLOAT:
1053             cond = bld.CreateFCmpOLT(load, to);
1054             break;
1055          }
1056          bld.CreateCondBr(cond, body, after);
1057       }
1058 
1059       bld.SetInsertPoint(body);
1060 
1061       std::pair<llvm::BasicBlock*, llvm::BasicBlock*> saved_loop = loop;
1062       loop = std::make_pair(header, after);
1063       visit_exec_list(&ir->body_instructions, this);
1064       loop = saved_loop;
1065 
1066       if(ir->counter && ir->increment)
1067       {
1068          switch(ir->counter->type->base_type)
1069          {
1070          case GLSL_TYPE_BOOL:
1071          case GLSL_TYPE_UINT:
1072          case GLSL_TYPE_INT:
1073             bld.CreateStore(bld.CreateAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
1074             break;
1075          case GLSL_TYPE_FLOAT:
1076             bld.CreateStore(bld.CreateFAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
1077             break;
1078          }
1079       }
1080       bld.CreateBr(header);
1081 
1082       bb = after;
1083       bld.SetInsertPoint(bb);
1084    }
1085 
visit(class ir_if * ir)1086    virtual void visit(class ir_if *ir)
1087    {
1088       llvm::BasicBlock* bbt = llvm::BasicBlock::Create(ctx, "if", fun);
1089       llvm::BasicBlock* bbf = llvm::BasicBlock::Create(ctx, "else", fun);
1090       llvm::BasicBlock* bbe = llvm::BasicBlock::Create(ctx, "endif", fun);
1091       bld.CreateCondBr(llvm_value(ir->condition), bbt, bbf);
1092 
1093       bld.SetInsertPoint(bbt);
1094       visit_exec_list(&ir->then_instructions, this);
1095       bld.CreateBr(bbe);
1096 
1097       bld.SetInsertPoint(bbf);
1098       visit_exec_list(&ir->else_instructions, this);
1099       bld.CreateBr(bbe);
1100 
1101       bb = bbe;
1102       bld.SetInsertPoint(bb);
1103    }
1104 
visit(class ir_return * ir)1105    virtual void visit(class ir_return * ir)
1106    {
1107       if(!ir->value)
1108          bld.CreateRetVoid();
1109       else
1110          bld.CreateRet(llvm_value(ir->value));
1111 
1112       bb = llvm::BasicBlock::Create(ctx, "dead_code.return", fun);
1113       bld.SetInsertPoint(bb);
1114    }
1115 
visit(class ir_call * ir)1116    virtual void visit(class ir_call * ir)
1117    {
1118       std::vector<llvm::Value*> args;
1119 
1120       foreach_iter(exec_list_iterator, iter, *ir)
1121       {
1122          ir_rvalue *arg = (ir_constant *)iter.get();
1123          args.push_back(llvm_value(arg));
1124       }
1125 
1126       result = bld.CreateCall(llvm_function(ir->get_callee()), llvm::ArrayRef<llvm::Value*>(args));
1127 
1128       llvm::AttrListPtr attr;
1129       ((llvm::CallInst*)result)->setAttributes(attr);
1130    }
1131 
visit(class ir_constant * ir)1132    virtual void visit(class ir_constant * ir)
1133    {
1134       if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1135          std::vector<llvm::Constant*> fields;
1136          foreach_iter(exec_list_iterator, iter, ir->components) {
1137             ir_constant *field = (ir_constant *)iter.get();
1138             fields.push_back(llvm_constant(field));
1139          }
1140          result = llvm::ConstantStruct::get((llvm::StructType*)llvm_type(ir->type), fields);
1141       }
1142       else if (ir->type->base_type == GLSL_TYPE_ARRAY) {
1143          std::vector<llvm::Constant*> elems;
1144          for (unsigned i = 0; i < ir->type->length; i++)
1145             elems.push_back(llvm_constant(ir->array_elements[i]));
1146          result = llvm::ConstantArray::get((llvm::ArrayType*)llvm_type(ir->type), elems);
1147       }
1148       else
1149       {
1150          llvm::Type* base_type = llvm_base_type(ir->type->base_type);
1151          llvm::Type* vec_type = llvm_vec_type(ir->type);
1152          llvm::Type* type = llvm_type(ir->type);
1153 
1154          std::vector<llvm::Constant*> vecs;
1155          unsigned idx = 0;
1156          for (unsigned i = 0; i < ir->type->matrix_columns; ++i) {
1157             std::vector<llvm::Constant*> elems;
1158             for (unsigned j = 0; j < ir->type->vector_elements; ++j) {
1159                llvm::Constant* elem;
1160                switch(ir->type->base_type)
1161                {
1162                case GLSL_TYPE_FLOAT:
1163                   elem = llvm::ConstantFP::get(base_type, ir->value.f[idx]);
1164                   break;
1165                case GLSL_TYPE_UINT:
1166                   elem = llvm::ConstantInt::get(base_type, ir->value.u[idx]);
1167                   break;
1168                case GLSL_TYPE_INT:
1169                   elem = llvm::ConstantInt::get(base_type, ir->value.i[idx]);
1170                   break;
1171                case GLSL_TYPE_BOOL:
1172                   elem = llvm::ConstantInt::get(base_type, ir->value.b[idx]);
1173                   break;
1174                }
1175                elems.push_back(elem);
1176                ++idx;
1177             }
1178 
1179             llvm::Constant* vec;
1180             if(ir->type->vector_elements > 1) {
1181                llvm::ArrayRef<llvm::Constant*> ConstantArray(elems);
1182                vec = llvm::ConstantVector::get(ConstantArray);
1183             } else {
1184                vec = elems[0];
1185             }
1186             vecs.push_back(vec);
1187          }
1188 
1189          if(ir->type->matrix_columns > 1)
1190             result = llvm::ConstantArray::get((llvm::ArrayType*)type, vecs);
1191          else
1192             result = vecs[0];
1193       }
1194    }
1195 
llvm_shuffle(llvm::Value * val,int * shuffle_mask,unsigned res_width,const llvm::Twine & name="")1196    llvm::Value* llvm_shuffle(llvm::Value* val, int* shuffle_mask, unsigned res_width, const llvm::Twine &name = "")
1197    {
1198       llvm::Type* elem_type = val->getType();
1199       llvm::Type* res_type = elem_type;;
1200       unsigned val_width = 1;
1201       if(val->getType()->isVectorTy())
1202       {
1203          val_width = ((llvm::VectorType*)val->getType())->getNumElements();
1204          elem_type = ((llvm::VectorType*)val->getType())->getElementType();
1205       }
1206       if(res_width > 1)
1207          res_type = llvm::VectorType::get(elem_type, res_width);
1208 
1209       llvm::Constant* shuffle_mask_values[4];
1210       assert(res_width <= 4);
1211       bool any_def = false;
1212       for(unsigned i = 0; i < res_width; ++i)
1213       {
1214          if(shuffle_mask[i] < 0)
1215             shuffle_mask_values[i] = llvm::UndefValue::get(llvm::Type::getInt32Ty(ctx));
1216          else
1217          {
1218             any_def = true;
1219             shuffle_mask_values[i] = llvm_int(shuffle_mask[i]);
1220          }
1221       }
1222 
1223       llvm::Value* undef = llvm::UndefValue::get(res_type);
1224       if(!any_def)
1225          return undef;
1226 
1227       if(val_width > 1)
1228       {
1229          if(res_width > 1)
1230          {
1231             if(val_width == res_width)
1232             {
1233                bool nontrivial = false;
1234                for(unsigned i = 0; i < val_width; ++i)
1235                {
1236                   if(shuffle_mask[i] != (int)i)
1237                      nontrivial = true;
1238                }
1239                if(!nontrivial)
1240                   return val;
1241             }
1242 
1243             return bld.CreateShuffleVector(val, llvm::UndefValue::get(val->getType()), llvm::ConstantVector::get(pack(shuffle_mask_values, res_width)), name);
1244          }
1245          else
1246             return bld.CreateExtractElement(val, llvm_int(shuffle_mask[0]), name);
1247       }
1248       else
1249       {
1250          if(res_width > 1)
1251          {
1252             llvm::Value* tmp = undef;
1253             for(unsigned i = 0; i < res_width; ++i)
1254             {
1255                if(shuffle_mask[i] >= 0)
1256                tmp = bld.CreateInsertElement(tmp, val, llvm_int(i), name);
1257             }
1258             return tmp;
1259          }
1260          else if(shuffle_mask[0] >= 0)
1261             return val;
1262          else
1263             return undef;
1264       }
1265    }
1266 
1267 
visit(class ir_swizzle * swz)1268    virtual void visit(class ir_swizzle * swz)
1269    {
1270       llvm::Value* val = llvm_value(swz->val);
1271       int mask[4] = {swz->mask.x, swz->mask.y, swz->mask.z, swz->mask.w};
1272       result = llvm_shuffle(val, mask, swz->mask.num_components, "swizzle");
1273    }
1274 
visit(class ir_assignment * ir)1275    virtual void visit(class ir_assignment * ir)
1276    {
1277       llvm::Value* lhs = llvm_pointer(ir->lhs);
1278       llvm::Value* rhs = llvm_value(ir->rhs);
1279       unsigned width = ir->lhs->type->vector_elements;
1280       unsigned mask = (1 << width) - 1;
1281       assert(rhs);
1282 
1283       // TODO: masking for matrix assignment
1284       if (ir->rhs->type->is_matrix()) {
1285          bld.CreateStore(rhs, lhs, "mat_str");
1286          return;
1287       }
1288 
1289       if (!(ir->write_mask & mask))
1290          return;
1291 
1292       if (ir->rhs->type->vector_elements < width) {
1293          int expand_mask[4] = {-1, -1, -1, -1};
1294          for (unsigned i = 0; i < ir->lhs->type->vector_elements; ++i)
1295             expand_mask[i] = i;
1296 //         printf("ve: %u w %u issw: %i\n", ir->rhs->type->vector_elements, width, !!ir->rhs->as_swizzle());
1297          rhs = llvm_shuffle(rhs, expand_mask, width, "assign.expand");
1298       }
1299 
1300       if (width > 1 && (ir->write_mask & mask) != mask) {
1301          llvm::Constant* blend_mask[4];
1302          // refer to ir.h: ir_assignment::write_mask
1303          // A partially-set write mask means that each enabled channel gets
1304          // the value from a consecutive channel of the rhs.
1305          unsigned rhsChannel = 0;
1306          for (unsigned i = 0; i < width; ++i) {
1307             if (ir->write_mask & (1 << i))
1308                blend_mask[i] = llvm_int(width + rhsChannel++);
1309             else
1310                blend_mask[i] = llvm_int(i);
1311          }
1312          rhs = bld.CreateShuffleVector(bld.CreateLoad(lhs), rhs, llvm::ConstantVector::get(pack(blend_mask, width)), "assign.writemask");
1313       }
1314 
1315       if(ir->condition)
1316          rhs = bld.CreateSelect(llvm_value(ir->condition), rhs, bld.CreateLoad(lhs), "assign.conditional");
1317 
1318       bld.CreateStore(rhs, lhs);
1319    }
1320 
visit(class ir_variable * var)1321    virtual void visit(class ir_variable * var)
1322    {
1323       llvm_variable(var);
1324    }
1325 
visit(ir_function_signature * sig)1326    virtual void visit(ir_function_signature *sig)
1327    {
1328       if(!sig->is_defined)
1329          return;
1330 
1331       assert(!fun);
1332       fun = llvm_function(sig);
1333 
1334       bb = llvm::BasicBlock::Create(ctx, "entry", fun);
1335       bld.SetInsertPoint(bb);
1336 
1337       llvm::Function::arg_iterator ai = fun->arg_begin();
1338       if (!strcmp("main",sig->function_name()))
1339       {
1340          assert(3 == fun->arg_size());
1341          bld.CreateStore(ai, inputsPtr);
1342          inputs = ai;
1343          ai++;
1344          bld.CreateStore(ai, outputsPtr);
1345          outputs = ai;
1346          ai++;
1347          bld.CreateStore(ai, constantsPtr);
1348          constants = ai;
1349          ai++;
1350       }
1351       else
1352       {
1353          foreach_iter(exec_list_iterator, iter, sig->parameters) {
1354             ir_variable* arg = (ir_variable*)iter.get();
1355             ai->setName(arg->name);
1356             bld.CreateStore(ai, llvm_variable(arg));
1357             ++ai;
1358          }
1359          inputs = bld.CreateLoad(inputsPtr);
1360          outputs = bld.CreateLoad(outputsPtr);
1361          constants = bld.CreateLoad(constantsPtr);
1362       }
1363       inputs->setName("gl_inputs");
1364       outputs->setName("gl_outputs");
1365       constants->setName("gl_constants");
1366 
1367 
1368 
1369       foreach_iter(exec_list_iterator, iter, sig->body) {
1370          ir_instruction *ir = (ir_instruction *)iter.get();
1371 
1372          ir->accept(this);
1373       }
1374 
1375       if(fun->getReturnType()->isVoidTy())
1376          bld.CreateRetVoid();
1377       else
1378          bld.CreateRet(llvm::UndefValue::get(fun->getReturnType()));
1379 
1380       bb = NULL;
1381       fun = NULL;
1382    }
1383 
visit(class ir_function * funs)1384    virtual void visit(class ir_function * funs)
1385    {
1386       foreach_iter(exec_list_iterator, iter, *funs)
1387       {
1388          ir_function_signature* sig = (ir_function_signature*)iter.get();
1389          sig->accept(this);
1390       }
1391    }
1392 };
1393 
1394 struct llvm::Module *
glsl_ir_to_llvm_module(struct exec_list * ir,llvm::Module * mod,const struct GGLState * gglCtx,const char * shaderSuffix)1395 glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod,
1396                         const struct GGLState * gglCtx, const char * shaderSuffix)
1397 {
1398    ir_to_llvm_visitor v(mod, gglCtx, shaderSuffix);
1399 
1400    visit_exec_list(ir, &v);
1401 
1402 //   mod->dump();
1403    if(llvm::verifyModule(*mod, llvm::PrintMessageAction, 0))
1404    {
1405       puts("**\n module verification failed **\n");
1406       mod->dump();
1407       assert(0);
1408       return NULL;
1409    }
1410 
1411    return mod;
1412    //v.ir_to_llvm_emit_op1(NULL, OPCODE_END, ir_to_llvm_undef_dst, ir_to_llvm_undef);
1413 }
1414