• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bcc/Assert.h"
18 #include "bcc/Renderscript/RSTransforms.h"
19 
20 #include <cstdlib>
21 
22 #include <llvm/IR/DerivedTypes.h>
23 #include <llvm/IR/Function.h>
24 #include <llvm/IR/Instructions.h>
25 #include <llvm/IR/IRBuilder.h>
26 #include <llvm/IR/MDBuilder.h>
27 #include <llvm/IR/Module.h>
28 #include <llvm/Pass.h>
29 #include <llvm/Support/raw_ostream.h>
30 #include <llvm/IR/DataLayout.h>
31 #include <llvm/IR/Function.h>
32 #include <llvm/IR/Type.h>
33 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
34 
35 #include "bcc/Config/Config.h"
36 #include "bcc/Support/Log.h"
37 
38 #include "bcinfo/MetadataExtractor.h"
39 
40 #define NUM_EXPANDED_FUNCTION_PARAMS 5
41 
42 using namespace bcc;
43 
44 namespace {
45 
46 static const bool gEnableRsTbaa = true;
47 
48 /* RSForEachExpandPass - This pass operates on functions that are able to be
49  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50  * ForEach-able function to be invoked over the appropriate data cells of the
51  * input/output allocations (adjusting other relevant parameters as we go). We
52  * support doing this for any ForEach-able compute kernels. The new function
53  * name is the original function name followed by ".expand". Note that we
54  * still generate code for the original function.
55  */
56 class RSForEachExpandPass : public llvm::ModulePass {
57 private:
58   static char ID;
59 
60   llvm::Module *Module;
61   llvm::LLVMContext *Context;
62 
63   /*
64    * Pointer to LLVM type information for the ForEachStubType and the function
65    * signature for expanded kernels.  These must be re-calculated for each
66    * module the pass is run on.
67    */
68   llvm::StructType   *ForEachStubType;
69   llvm::FunctionType *ExpandedFunctionType;
70 
71   uint32_t mExportForEachCount;
72   const char **mExportForEachNameList;
73   const uint32_t *mExportForEachSignatureList;
74 
75   // Turns on optimization of allocation stride values.
76   bool mEnableStepOpt;
77 
getRootSignature(llvm::Function * Function)78   uint32_t getRootSignature(llvm::Function *Function) {
79     const llvm::NamedMDNode *ExportForEachMetadata =
80         Module->getNamedMetadata("#rs_export_foreach");
81 
82     if (!ExportForEachMetadata) {
83       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84       for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                         E = Function->arg_end();
86            B != E;
87            ++B) {
88         RootArgTys.push_back(B->getType());
89       }
90 
91       // For pre-ICS bitcode, we may not have signature information. In that
92       // case, we use the size of the RootArgTys to select the number of
93       // arguments.
94       return (1 << RootArgTys.size()) - 1;
95     }
96 
97     if (ExportForEachMetadata->getNumOperands() == 0) {
98       return 0;
99     }
100 
101     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102 
103     // We only handle the case for legacy root() functions here, so this is
104     // hard-coded to look at only the first such function.
105     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107       llvm::Value *SigVal = SigNode->getOperand(0);
108       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109         llvm::StringRef SigString =
110             static_cast<llvm::MDString*>(SigVal)->getString();
111         uint32_t Signature = 0;
112         if (SigString.getAsInteger(10, Signature)) {
113           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114           return 0;
115         }
116         return Signature;
117       }
118     }
119 
120     return 0;
121   }
122 
123   // Get the actual value we should use to step through an allocation.
124   //
125   // Normally the value we use to step through an allocation is given to us by
126   // the driver. However, for certain primitive data types, we can derive an
127   // integer constant for the step value. We use this integer constant whenever
128   // possible to allow further compiler optimizations to take place.
129   //
130   // DL - Target Data size/layout information.
131   // T - Type of allocation (should be a pointer).
132   // OrigStep - Original step increment (root.expand() input from driver).
getStepValue(llvm::DataLayout * DL,llvm::Type * AllocType,llvm::Value * OrigStep)133   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
134                             llvm::Value *OrigStep) {
135     bccAssert(DL);
136     bccAssert(AllocType);
137     bccAssert(OrigStep);
138     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
139     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
140     if (mEnableStepOpt && AllocType != VoidPtrTy && PT) {
141       llvm::Type *ET = PT->getElementType();
142       uint64_t ETSize = DL->getTypeAllocSize(ET);
143       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
144       return llvm::ConstantInt::get(Int32Ty, ETSize);
145     } else {
146       return OrigStep;
147     }
148   }
149 
150   /// @brief Builds the types required by the pass for the given context.
buildTypes(void)151   void buildTypes(void) {
152     // Create the RsForEachStubParam struct.
153 
154     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
155     llvm::Type *Int32Ty   = llvm::Type::getInt32Ty(*Context);
156     /* Defined in frameworks/base/libs/rs/rs_hal.h:
157      *
158      * struct RsForEachStubParamStruct {
159      *   const void *in;
160      *   void *out;
161      *   const void *usr;
162      *   uint32_t usr_len;
163      *   uint32_t x;
164      *   uint32_t y;
165      *   uint32_t z;
166      *   uint32_t lod;
167      *   enum RsAllocationCubemapFace face;
168      *   uint32_t ar[16];
169      *   const void **ins;
170      *   uint32_t *eStrideIns;
171      * };
172      */
173     llvm::SmallVector<llvm::Type*, 16> StructTypes;
174     StructTypes.push_back(VoidPtrTy);  // const void *in
175     StructTypes.push_back(VoidPtrTy);  // void *out
176     StructTypes.push_back(VoidPtrTy);  // const void *usr
177     StructTypes.push_back(Int32Ty);    // uint32_t usr_len
178     StructTypes.push_back(Int32Ty);    // uint32_t x
179     StructTypes.push_back(Int32Ty);    // uint32_t y
180     StructTypes.push_back(Int32Ty);    // uint32_t z
181     StructTypes.push_back(Int32Ty);    // uint32_t lod
182     StructTypes.push_back(Int32Ty);    // enum RsAllocationCubemapFace
183     StructTypes.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
184 
185     StructTypes.push_back(llvm::PointerType::getUnqual(VoidPtrTy)); // const void **ins
186     StructTypes.push_back(Int32Ty->getPointerTo()); // uint32_t *eStrideIns
187 
188     ForEachStubType =
189       llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
190 
191     // Create the function type for expanded kernels.
192 
193     llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
194 
195     llvm::SmallVector<llvm::Type*, 8> ParamTypes;
196     ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
197     ParamTypes.push_back(Int32Ty);          // uint32_t x1
198     ParamTypes.push_back(Int32Ty);          // uint32_t x2
199     ParamTypes.push_back(Int32Ty);          // uint32_t instep
200     ParamTypes.push_back(Int32Ty);          // uint32_t outstep
201 
202     ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
203                                               ParamTypes,
204                                               false);
205   }
206 
207   /// @brief Create skeleton of the expanded function.
208   ///
209   /// This creates a function with the following signature:
210   ///
211   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
212   ///         uint32_t instep, uint32_t outstep)
213   ///
createEmptyExpandedFunction(llvm::StringRef OldName)214   llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
215     llvm::Function *ExpandedFunction =
216       llvm::Function::Create(ExpandedFunctionType,
217                              llvm::GlobalValue::ExternalLinkage,
218                              OldName + ".expand", Module);
219 
220     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
221 
222     llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
223 
224     (AI++)->setName("p");
225     (AI++)->setName("x1");
226     (AI++)->setName("x2");
227     (AI++)->setName("arg_instep");
228     (AI++)->setName("arg_outstep");
229 
230     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
231                                                        ExpandedFunction);
232     llvm::IRBuilder<> Builder(Begin);
233     Builder.CreateRetVoid();
234 
235     return ExpandedFunction;
236   }
237 
238   /// @brief Create an empty loop
239   ///
240   /// Create a loop of the form:
241   ///
242   /// for (i = LowerBound; i < UpperBound; i++)
243   ///   ;
244   ///
245   /// After the loop has been created, the builder is set such that
246   /// instructions can be added to the loop body.
247   ///
248   /// @param Builder The builder to use to build this loop. The current
249   ///                position of the builder is the position the loop
250   ///                will be inserted.
251   /// @param LowerBound The first value of the loop iterator
252   /// @param UpperBound The maximal value of the loop iterator
253   /// @param LoopIV A reference that will be set to the loop iterator.
254   /// @return The BasicBlock that will be executed after the loop.
createLoop(llvm::IRBuilder<> & Builder,llvm::Value * LowerBound,llvm::Value * UpperBound,llvm::PHINode ** LoopIV)255   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
256                                llvm::Value *LowerBound,
257                                llvm::Value *UpperBound,
258                                llvm::PHINode **LoopIV) {
259     assert(LowerBound->getType() == UpperBound->getType());
260 
261     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
262     llvm::Value *Cond, *IVNext;
263     llvm::PHINode *IV;
264 
265     CondBB = Builder.GetInsertBlock();
266     AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
267     HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
268 
269     // if (LowerBound < Upperbound)
270     //   goto LoopHeader
271     // else
272     //   goto AfterBB
273     CondBB->getTerminator()->eraseFromParent();
274     Builder.SetInsertPoint(CondBB);
275     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
276     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
277 
278     // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
279     // iv.next = iv + 1
280     // if (iv.next < Upperbound)
281     //   goto LoopHeader
282     // else
283     //   goto AfterBB
284     Builder.SetInsertPoint(HeaderBB);
285     IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
286     IV->addIncoming(LowerBound, CondBB);
287     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
288     IV->addIncoming(IVNext, HeaderBB);
289     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
290     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
291     AfterBB->setName("Exit");
292     Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
293     *LoopIV = IV;
294     return AfterBB;
295   }
296 
297 public:
RSForEachExpandPass(bool pEnableStepOpt)298   RSForEachExpandPass(bool pEnableStepOpt)
299       : ModulePass(ID), Module(NULL), Context(NULL),
300         mEnableStepOpt(pEnableStepOpt) {
301 
302   }
303 
304   /* Performs the actual optimization on a selected function. On success, the
305    * Module will contain a new function of the name "<NAME>.expand" that
306    * invokes <NAME>() in a loop with the appropriate parameters.
307    */
ExpandFunction(llvm::Function * Function,uint32_t Signature)308   bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
309     ALOGV("Expanding ForEach-able Function %s",
310           Function->getName().str().c_str());
311 
312     if (!Signature) {
313       Signature = getRootSignature(Function);
314       if (!Signature) {
315         // We couldn't determine how to expand this function based on its
316         // function signature.
317         return false;
318       }
319     }
320 
321     llvm::DataLayout DL(Module);
322 
323     llvm::Function *ExpandedFunction =
324       createEmptyExpandedFunction(Function->getName());
325 
326     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
327 
328     /*
329      * Extract the expanded function's parameters.  It is guaranteed by
330      * createEmptyExpandedFunction that there will be five parameters.
331      */
332     llvm::Function::arg_iterator ExpandedFunctionArgIter =
333       ExpandedFunction->arg_begin();
334 
335     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
336     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
337     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
338     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
339     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
340 
341     llvm::Value *InStep  = NULL;
342     llvm::Value *OutStep = NULL;
343 
344     // Construct the actual function body.
345     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
346 
347     // Collect and construct the arguments for the kernel().
348     // Note that we load any loop-invariant arguments before entering the Loop.
349     llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
350 
351     llvm::Type *InTy = NULL;
352     llvm::Value *InBasePtr = NULL;
353     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
354       InTy = (FunctionArgIter++)->getType();
355       InStep = getStepValue(&DL, InTy, Arg_instep);
356       InStep->setName("instep");
357       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
358     }
359 
360     llvm::Type *OutTy = NULL;
361     llvm::Value *OutBasePtr = NULL;
362     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
363       OutTy = (FunctionArgIter++)->getType();
364       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
365       OutStep->setName("outstep");
366       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
367     }
368 
369     llvm::Value *UsrData = NULL;
370     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
371       llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
372       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
373           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
374       UsrData->setName("UsrData");
375     }
376 
377     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
378       FunctionArgIter++;
379     }
380 
381     llvm::Value *Y = NULL;
382     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
383       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
384       FunctionArgIter++;
385     }
386 
387     bccAssert(FunctionArgIter == Function->arg_end());
388 
389     llvm::PHINode *IV;
390     createLoop(Builder, Arg_x1, Arg_x2, &IV);
391 
392     // Populate the actual call to kernel().
393     llvm::SmallVector<llvm::Value*, 8> RootArgs;
394 
395     llvm::Value *InPtr  = NULL;
396     llvm::Value *OutPtr = NULL;
397 
398     // Calculate the current input and output pointers
399     //
400     // We always calculate the input/output pointers with a GEP operating on i8
401     // values and only cast at the very end to OutTy. This is because the step
402     // between two values is given in bytes.
403     //
404     // TODO: We could further optimize the output by using a GEP operation of
405     // type 'OutTy' in cases where the element type of the allocation allows.
406     if (OutBasePtr) {
407       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
408       OutOffset = Builder.CreateMul(OutOffset, OutStep);
409       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
410       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
411     }
412 
413     if (InBasePtr) {
414       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
415       InOffset = Builder.CreateMul(InOffset, InStep);
416       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
417       InPtr = Builder.CreatePointerCast(InPtr, InTy);
418     }
419 
420     if (InPtr) {
421       RootArgs.push_back(InPtr);
422     }
423 
424     if (OutPtr) {
425       RootArgs.push_back(OutPtr);
426     }
427 
428     if (UsrData) {
429       RootArgs.push_back(UsrData);
430     }
431 
432     llvm::Value *X = IV;
433     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
434       RootArgs.push_back(X);
435     }
436 
437     if (Y) {
438       RootArgs.push_back(Y);
439     }
440 
441     Builder.CreateCall(Function, RootArgs);
442 
443     return true;
444   }
445 
446   /* Expand a pass-by-value kernel.
447    */
ExpandKernel(llvm::Function * Function,uint32_t Signature)448   bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
449     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
450     ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
451 
452     // TODO: Refactor this to share functionality with ExpandFunction.
453     llvm::DataLayout DL(Module);
454 
455     llvm::Function *ExpandedFunction =
456       createEmptyExpandedFunction(Function->getName());
457 
458     /*
459      * Extract the expanded function's parameters.  It is guaranteed by
460      * createEmptyExpandedFunction that there will be five parameters.
461      */
462 
463     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
464 
465     llvm::Function::arg_iterator ExpandedFunctionArgIter =
466       ExpandedFunction->arg_begin();
467 
468     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
469     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
470     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
471     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
472     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
473 
474     // Construct the actual function body.
475     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
476 
477     // Create TBAA meta-data.
478     llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
479     llvm::MDBuilder MDHelper(*Context);
480 
481     TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
482     TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
483     TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
484     TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
485     TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
486 
487     /*
488      * Collect and construct the arguments for the kernel().
489      *
490      * Note that we load any loop-invariant arguments before entering the Loop.
491      */
492     size_t NumInputs = Function->arg_size();
493 
494     llvm::Value *Y = NULL;
495     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
496       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
497       --NumInputs;
498     }
499 
500     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
501       --NumInputs;
502     }
503 
504     // No usrData parameter on kernels.
505     bccAssert(
506         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
507 
508     llvm::Function::arg_iterator ArgIter = Function->arg_begin();
509 
510     // Check the return type
511     llvm::Type     *OutTy      = NULL;
512     llvm::Value    *OutStep    = NULL;
513     llvm::LoadInst *OutBasePtr = NULL;
514 
515     bool PassOutByReference = false;
516 
517     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
518       llvm::Type *OutBaseTy = Function->getReturnType();
519 
520       if (OutBaseTy->isVoidTy()) {
521         PassOutByReference = true;
522         OutTy = ArgIter->getType();
523 
524         ArgIter++;
525         --NumInputs;
526       } else {
527         // We don't increment Args, since we are using the actual return type.
528         OutTy = OutBaseTy->getPointerTo();
529       }
530 
531       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
532       OutStep->setName("outstep");
533       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
534       if (gEnableRsTbaa) {
535         OutBasePtr->setMetadata("tbaa", TBAAPointer);
536       }
537     }
538 
539     llvm::SmallVector<llvm::Type*,     8> InTypes;
540     llvm::SmallVector<llvm::Value*,    8> InSteps;
541     llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
542     llvm::SmallVector<bool,            8> InIsStructPointer;
543 
544     if (NumInputs == 1) {
545       llvm::Type *InType = ArgIter->getType();
546 
547       /*
548        * AArch64 calling dictate that structs of sufficient size get passed by
549        * poiter instead of passed by value.  This, combined with the fact that
550        * we don't allow kernels to operate on pointer data means that if we see
551        * a kernel with a pointer parameter we know that it is struct input that
552        * has been promoted.  As such we don't need to convert its type to a
553        * pointer.  Later we will need to know to avoid a load, so we save this
554        * information in InIsStructPointer.
555        */
556       if (!InType->isPointerTy()) {
557         InType = InType->getPointerTo();
558         InIsStructPointer.push_back(false);
559       } else {
560         InIsStructPointer.push_back(true);
561       }
562 
563       llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
564 
565       InStep->setName("instep");
566 
567       llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, 0);
568       llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
569 
570       if (gEnableRsTbaa) {
571         InBasePtr->setMetadata("tbaa", TBAAPointer);
572       }
573 
574       InTypes.push_back(InType);
575       InSteps.push_back(InStep);
576       InBasePtrs.push_back(InBasePtr);
577 
578     } else if (NumInputs > 1) {
579       llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, 10);
580       llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
581                                                       "inputs_base");
582 
583       llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, 11);
584       llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
585                                                          "insteps_base");
586 
587       for (size_t InputIndex = 0; InputIndex < NumInputs;
588            ++InputIndex, ArgIter++) {
589 
590           llvm::Value *IndexVal = Builder.getInt32(InputIndex);
591 
592           llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
593           llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
594                                                           "instep_addr");
595 
596           llvm::Type *InType = ArgIter->getType();
597 
598           /*
599          * AArch64 calling dictate that structs of sufficient size get passed by
600          * poiter instead of passed by value.  This, combined with the fact that
601          * we don't allow kernels to operate on pointer data means that if we
602          * see a kernel with a pointer parameter we know that it is struct input
603          * that has been promoted.  As such we don't need to convert its type to
604          * a pointer.  Later we will need to know to avoid a load, so we save
605          * this information in InIsStructPointer.
606          */
607           if (!InType->isPointerTy()) {
608             InType = InType->getPointerTo();
609             InIsStructPointer.push_back(false);
610           } else {
611             InIsStructPointer.push_back(true);
612           }
613 
614           llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
615 
616           InStep->setName("instep");
617 
618           llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
619           llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
620                                                          "input_base");
621 
622           if (gEnableRsTbaa) {
623             InBasePtr->setMetadata("tbaa", TBAAPointer);
624           }
625 
626           InTypes.push_back(InType);
627           InSteps.push_back(InStep);
628           InBasePtrs.push_back(InBasePtr);
629       }
630     }
631 
632     llvm::PHINode *IV;
633     createLoop(Builder, Arg_x1, Arg_x2, &IV);
634 
635     // Populate the actual call to kernel().
636     llvm::SmallVector<llvm::Value*, 8> RootArgs;
637 
638     // Calculate the current input and output pointers
639     //
640     //
641     // We always calculate the input/output pointers with a GEP operating on i8
642     // values combined with a multiplication and only cast at the very end to
643     // OutTy.  This is to account for dynamic stepping sizes when the value
644     // isn't apparent at compile time.  In the (very common) case when we know
645     // the step size at compile time, due to haveing complete type information
646     // this multiplication will optmized out and produces code equivalent to a
647     // a GEP on a pointer of the correct type.
648 
649     // Output
650 
651     llvm::Value *OutPtr = NULL;
652     if (OutBasePtr) {
653       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
654 
655       OutOffset = Builder.CreateMul(OutOffset, OutStep);
656       OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
657       OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
658 
659       if (PassOutByReference) {
660         RootArgs.push_back(OutPtr);
661       }
662     }
663 
664     // Inputs
665 
666     if (NumInputs > 0) {
667       llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
668 
669       for (size_t Index = 0; Index < NumInputs; ++Index) {
670         llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
671         llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
672 
673         InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
674 
675         llvm::Value *Input;
676 
677         if (InIsStructPointer[Index]) {
678           Input = InPtr;
679 
680         } else {
681           llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
682 
683           if (gEnableRsTbaa) {
684             InputLoad->setMetadata("tbaa", TBAAAllocation);
685           }
686 
687           Input = InputLoad;
688         }
689 
690         RootArgs.push_back(Input);
691       }
692     }
693 
694     llvm::Value *X = IV;
695     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
696       RootArgs.push_back(X);
697     }
698 
699     if (Y) {
700       RootArgs.push_back(Y);
701     }
702 
703     llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
704 
705     if (OutPtr && !PassOutByReference) {
706       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
707       if (gEnableRsTbaa) {
708         Store->setMetadata("tbaa", TBAAAllocation);
709       }
710     }
711 
712     return true;
713   }
714 
715   /// @brief Checks if pointers to allocation internals are exposed
716   ///
717   /// This function verifies if through the parameters passed to the kernel
718   /// or through calls to the runtime library the script gains access to
719   /// pointers pointing to data within a RenderScript Allocation.
720   /// If we know we control all loads from and stores to data within
721   /// RenderScript allocations and if we know the run-time internal accesses
722   /// are all annotated with RenderScript TBAA metadata, only then we
723   /// can safely use TBAA to distinguish between generic and from-allocation
724   /// pointers.
allocPointersExposed(llvm::Module & Module)725   bool allocPointersExposed(llvm::Module &Module) {
726     // Old style kernel function can expose pointers to elements within
727     // allocations.
728     // TODO: Extend analysis to allow simple cases of old-style kernels.
729     for (size_t i = 0; i < mExportForEachCount; ++i) {
730       const char *Name = mExportForEachNameList[i];
731       uint32_t Signature = mExportForEachSignatureList[i];
732       if (Module.getFunction(Name) &&
733           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
734         return true;
735       }
736     }
737 
738     // Check for library functions that expose a pointer to an Allocation or
739     // that are not yet annotated with RenderScript-specific tbaa information.
740     static std::vector<std::string> Funcs;
741 
742     // rsGetElementAt(...)
743     Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
744     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
745     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
746     // rsSetElementAt()
747     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
748     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
749     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
750     // rsGetElementAtYuv_uchar_Y()
751     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
752     // rsGetElementAtYuv_uchar_U()
753     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
754     // rsGetElementAtYuv_uchar_V()
755     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
756 
757     for (std::vector<std::string>::iterator FI = Funcs.begin(),
758                                             FE = Funcs.end();
759          FI != FE; ++FI) {
760       llvm::Function *Function = Module.getFunction(*FI);
761 
762       if (!Function) {
763         ALOGE("Missing run-time function '%s'", FI->c_str());
764         return true;
765       }
766 
767       if (Function->getNumUses() > 0) {
768         return true;
769       }
770     }
771 
772     return false;
773   }
774 
775   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
776   ///
777   /// The TBAA metadata used to annotate loads/stores from RenderScript
778   /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
779   /// root node. LLVM does assume may-alias for all nodes in unrelated alias
780   /// analysis trees. This function makes the RenderScript TBAA a subtree of the
781   /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
782   /// every access to an Allocation is resolved to must-alias if compared to
783   /// a normal C/C++ access.
connectRenderScriptTBAAMetadata(llvm::Module & Module)784   void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
785     llvm::MDBuilder MDHelper(*Context);
786     llvm::MDNode *TBAARenderScript =
787       MDHelper.createTBAARoot("RenderScript TBAA");
788 
789     llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
790     llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
791                                                          TBAARoot);
792 
793     TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
794   }
795 
runOnModule(llvm::Module & Module)796   virtual bool runOnModule(llvm::Module &Module) {
797     bool Changed  = false;
798     this->Module  = &Module;
799     this->Context = &Module.getContext();
800 
801     this->buildTypes();
802 
803     bcinfo::MetadataExtractor me(&Module);
804     if (!me.extract()) {
805       ALOGE("Could not extract metadata from module!");
806       return false;
807     }
808     mExportForEachCount = me.getExportForEachSignatureCount();
809     mExportForEachNameList = me.getExportForEachNameList();
810     mExportForEachSignatureList = me.getExportForEachSignatureList();
811 
812     bool AllocsExposed = allocPointersExposed(Module);
813 
814     for (size_t i = 0; i < mExportForEachCount; ++i) {
815       const char *name = mExportForEachNameList[i];
816       uint32_t signature = mExportForEachSignatureList[i];
817       llvm::Function *kernel = Module.getFunction(name);
818       if (kernel) {
819         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
820           Changed |= ExpandKernel(kernel, signature);
821           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
822         } else if (kernel->getReturnType()->isVoidTy()) {
823           Changed |= ExpandFunction(kernel, signature);
824           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
825         } else {
826           // There are some graphics root functions that are not
827           // expanded, but that will be called directly. For those
828           // functions, we can not set the linkage to internal.
829         }
830       }
831     }
832 
833     if (gEnableRsTbaa && !AllocsExposed) {
834       connectRenderScriptTBAAMetadata(Module);
835     }
836 
837     return Changed;
838   }
839 
getPassName() const840   virtual const char *getPassName() const {
841     return "ForEach-able Function Expansion";
842   }
843 
844 }; // end RSForEachExpandPass
845 
846 } // end anonymous namespace
847 
848 char RSForEachExpandPass::ID = 0;
849 
850 namespace bcc {
851 
852 llvm::ModulePass *
createRSForEachExpandPass(bool pEnableStepOpt)853 createRSForEachExpandPass(bool pEnableStepOpt){
854   return new RSForEachExpandPass(pEnableStepOpt);
855 }
856 
857 } // end namespace bcc
858