• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bcc/Assert.h"
18 #include "bcc/Renderscript/RSTransforms.h"
19 
20 #include <cstdlib>
21 
22 #include <llvm/IR/DerivedTypes.h>
23 #include <llvm/IR/Function.h>
24 #include <llvm/IR/Instructions.h>
25 #include <llvm/IR/IRBuilder.h>
26 #include <llvm/IR/MDBuilder.h>
27 #include <llvm/IR/Module.h>
28 #include <llvm/Pass.h>
29 #include <llvm/Support/raw_ostream.h>
30 #include <llvm/IR/DataLayout.h>
31 #include <llvm/IR/Function.h>
32 #include <llvm/IR/Type.h>
33 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
34 
35 #include "bcc/Config/Config.h"
36 #include "bcc/Support/Log.h"
37 
38 #include "bcinfo/MetadataExtractor.h"
39 
40 #define NUM_EXPANDED_FUNCTION_PARAMS 5
41 
42 using namespace bcc;
43 
44 namespace {
45 
46 static const bool gEnableRsTbaa = true;
47 
48 /* RSForEachExpandPass - This pass operates on functions that are able to be
49  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50  * ForEach-able function to be invoked over the appropriate data cells of the
51  * input/output allocations (adjusting other relevant parameters as we go). We
52  * support doing this for any ForEach-able compute kernels. The new function
53  * name is the original function name followed by ".expand". Note that we
54  * still generate code for the original function.
55  */
56 class RSForEachExpandPass : public llvm::ModulePass {
57 private:
58   static char ID;
59 
60   llvm::Module *Module;
61   llvm::LLVMContext *Context;
62 
63   /*
64    * Pointer to LLVM type information for the ForEachStubType and the function
65    * signature for expanded kernels.  These must be re-calculated for each
66    * module the pass is run on.
67    */
68   llvm::StructType   *ForEachStubType;
69   llvm::FunctionType *ExpandedFunctionType;
70 
71   uint32_t mExportForEachCount;
72   const char **mExportForEachNameList;
73   const uint32_t *mExportForEachSignatureList;
74 
75   // Turns on optimization of allocation stride values.
76   bool mEnableStepOpt;
77 
getRootSignature(llvm::Function * Function)78   uint32_t getRootSignature(llvm::Function *Function) {
79     const llvm::NamedMDNode *ExportForEachMetadata =
80         Module->getNamedMetadata("#rs_export_foreach");
81 
82     if (!ExportForEachMetadata) {
83       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84       for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                         E = Function->arg_end();
86            B != E;
87            ++B) {
88         RootArgTys.push_back(B->getType());
89       }
90 
91       // For pre-ICS bitcode, we may not have signature information. In that
92       // case, we use the size of the RootArgTys to select the number of
93       // arguments.
94       return (1 << RootArgTys.size()) - 1;
95     }
96 
97     if (ExportForEachMetadata->getNumOperands() == 0) {
98       return 0;
99     }
100 
101     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102 
103     // We only handle the case for legacy root() functions here, so this is
104     // hard-coded to look at only the first such function.
105     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107       llvm::Value *SigVal = SigNode->getOperand(0);
108       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109         llvm::StringRef SigString =
110             static_cast<llvm::MDString*>(SigVal)->getString();
111         uint32_t Signature = 0;
112         if (SigString.getAsInteger(10, Signature)) {
113           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114           return 0;
115         }
116         return Signature;
117       }
118     }
119 
120     return 0;
121   }
122 
isStepOptSupported(llvm::Type * AllocType)123   bool isStepOptSupported(llvm::Type *AllocType) {
124 
125     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
126     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
127 
128     if (mEnableStepOpt) {
129       return false;
130     }
131 
132     if (AllocType == VoidPtrTy) {
133       return false;
134     }
135 
136     if (!PT) {
137       return false;
138     }
139 
140     // remaining conditions are 64-bit only
141     if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
142       return true;
143     }
144 
145     // coerce suggests an upconverted struct type, which we can't support
146     if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
147       return false;
148     }
149 
150     // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
151     llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
152     llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
153     if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
154       return false;
155     }
156 
157     return true;
158   }
159 
160   // Get the actual value we should use to step through an allocation.
161   //
162   // Normally the value we use to step through an allocation is given to us by
163   // the driver. However, for certain primitive data types, we can derive an
164   // integer constant for the step value. We use this integer constant whenever
165   // possible to allow further compiler optimizations to take place.
166   //
167   // DL - Target Data size/layout information.
168   // T - Type of allocation (should be a pointer).
169   // OrigStep - Original step increment (root.expand() input from driver).
getStepValue(llvm::DataLayout * DL,llvm::Type * AllocType,llvm::Value * OrigStep)170   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
171                             llvm::Value *OrigStep) {
172     bccAssert(DL);
173     bccAssert(AllocType);
174     bccAssert(OrigStep);
175     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
176     if (isStepOptSupported(AllocType)) {
177       llvm::Type *ET = PT->getElementType();
178       uint64_t ETSize = DL->getTypeAllocSize(ET);
179       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
180       return llvm::ConstantInt::get(Int32Ty, ETSize);
181     } else {
182       return OrigStep;
183     }
184   }
185 
186   /// @brief Builds the types required by the pass for the given context.
buildTypes(void)187   void buildTypes(void) {
188     // Create the RsForEachStubParam struct.
189 
190     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
191     llvm::Type *Int32Ty   = llvm::Type::getInt32Ty(*Context);
192     /* Defined in frameworks/base/libs/rs/rs_hal.h:
193      *
194      * struct RsForEachStubParamStruct {
195      *   const void *in;
196      *   void *out;
197      *   const void *usr;
198      *   uint32_t usr_len;
199      *   uint32_t x;
200      *   uint32_t y;
201      *   uint32_t z;
202      *   uint32_t lod;
203      *   enum RsAllocationCubemapFace face;
204      *   uint32_t ar[16];
205      *   const void **ins;
206      *   uint32_t *eStrideIns;
207      * };
208      */
209     llvm::SmallVector<llvm::Type*, 16> StructTypes;
210     StructTypes.push_back(VoidPtrTy);  // const void *in
211     StructTypes.push_back(VoidPtrTy);  // void *out
212     StructTypes.push_back(VoidPtrTy);  // const void *usr
213     StructTypes.push_back(Int32Ty);    // uint32_t usr_len
214     StructTypes.push_back(Int32Ty);    // uint32_t x
215     StructTypes.push_back(Int32Ty);    // uint32_t y
216     StructTypes.push_back(Int32Ty);    // uint32_t z
217     StructTypes.push_back(Int32Ty);    // uint32_t lod
218     StructTypes.push_back(Int32Ty);    // enum RsAllocationCubemapFace
219     StructTypes.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
220 
221     StructTypes.push_back(llvm::PointerType::getUnqual(VoidPtrTy)); // const void **ins
222     StructTypes.push_back(Int32Ty->getPointerTo()); // uint32_t *eStrideIns
223 
224     ForEachStubType =
225       llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
226 
227     // Create the function type for expanded kernels.
228 
229     llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
230 
231     llvm::SmallVector<llvm::Type*, 8> ParamTypes;
232     ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
233     ParamTypes.push_back(Int32Ty);          // uint32_t x1
234     ParamTypes.push_back(Int32Ty);          // uint32_t x2
235     ParamTypes.push_back(Int32Ty);          // uint32_t instep
236     ParamTypes.push_back(Int32Ty);          // uint32_t outstep
237 
238     ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
239                                               ParamTypes,
240                                               false);
241   }
242 
243   /// @brief Create skeleton of the expanded function.
244   ///
245   /// This creates a function with the following signature:
246   ///
247   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
248   ///         uint32_t instep, uint32_t outstep)
249   ///
createEmptyExpandedFunction(llvm::StringRef OldName)250   llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
251     llvm::Function *ExpandedFunction =
252       llvm::Function::Create(ExpandedFunctionType,
253                              llvm::GlobalValue::ExternalLinkage,
254                              OldName + ".expand", Module);
255 
256     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
257 
258     llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
259 
260     (AI++)->setName("p");
261     (AI++)->setName("x1");
262     (AI++)->setName("x2");
263     (AI++)->setName("arg_instep");
264     (AI++)->setName("arg_outstep");
265 
266     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
267                                                        ExpandedFunction);
268     llvm::IRBuilder<> Builder(Begin);
269     Builder.CreateRetVoid();
270 
271     return ExpandedFunction;
272   }
273 
274   /// @brief Create an empty loop
275   ///
276   /// Create a loop of the form:
277   ///
278   /// for (i = LowerBound; i < UpperBound; i++)
279   ///   ;
280   ///
281   /// After the loop has been created, the builder is set such that
282   /// instructions can be added to the loop body.
283   ///
284   /// @param Builder The builder to use to build this loop. The current
285   ///                position of the builder is the position the loop
286   ///                will be inserted.
287   /// @param LowerBound The first value of the loop iterator
288   /// @param UpperBound The maximal value of the loop iterator
289   /// @param LoopIV A reference that will be set to the loop iterator.
290   /// @return The BasicBlock that will be executed after the loop.
createLoop(llvm::IRBuilder<> & Builder,llvm::Value * LowerBound,llvm::Value * UpperBound,llvm::PHINode ** LoopIV)291   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
292                                llvm::Value *LowerBound,
293                                llvm::Value *UpperBound,
294                                llvm::PHINode **LoopIV) {
295     assert(LowerBound->getType() == UpperBound->getType());
296 
297     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
298     llvm::Value *Cond, *IVNext;
299     llvm::PHINode *IV;
300 
301     CondBB = Builder.GetInsertBlock();
302     AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
303     HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
304 
305     // if (LowerBound < Upperbound)
306     //   goto LoopHeader
307     // else
308     //   goto AfterBB
309     CondBB->getTerminator()->eraseFromParent();
310     Builder.SetInsertPoint(CondBB);
311     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
312     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
313 
314     // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
315     // iv.next = iv + 1
316     // if (iv.next < Upperbound)
317     //   goto LoopHeader
318     // else
319     //   goto AfterBB
320     Builder.SetInsertPoint(HeaderBB);
321     IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
322     IV->addIncoming(LowerBound, CondBB);
323     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
324     IV->addIncoming(IVNext, HeaderBB);
325     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
326     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
327     AfterBB->setName("Exit");
328     Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
329     *LoopIV = IV;
330     return AfterBB;
331   }
332 
333 public:
RSForEachExpandPass(bool pEnableStepOpt)334   RSForEachExpandPass(bool pEnableStepOpt)
335       : ModulePass(ID), Module(NULL), Context(NULL),
336         mEnableStepOpt(pEnableStepOpt) {
337 
338   }
339 
340   /* Performs the actual optimization on a selected function. On success, the
341    * Module will contain a new function of the name "<NAME>.expand" that
342    * invokes <NAME>() in a loop with the appropriate parameters.
343    */
ExpandFunction(llvm::Function * Function,uint32_t Signature)344   bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
345     ALOGV("Expanding ForEach-able Function %s",
346           Function->getName().str().c_str());
347 
348     if (!Signature) {
349       Signature = getRootSignature(Function);
350       if (!Signature) {
351         // We couldn't determine how to expand this function based on its
352         // function signature.
353         return false;
354       }
355     }
356 
357     llvm::DataLayout DL(Module);
358 
359     llvm::Function *ExpandedFunction =
360       createEmptyExpandedFunction(Function->getName());
361 
362     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
363 
364     /*
365      * Extract the expanded function's parameters.  It is guaranteed by
366      * createEmptyExpandedFunction that there will be five parameters.
367      */
368     llvm::Function::arg_iterator ExpandedFunctionArgIter =
369       ExpandedFunction->arg_begin();
370 
371     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
372     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
373     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
374     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
375     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
376 
377     llvm::Value *InStep  = NULL;
378     llvm::Value *OutStep = NULL;
379 
380     // Construct the actual function body.
381     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
382 
383     // Collect and construct the arguments for the kernel().
384     // Note that we load any loop-invariant arguments before entering the Loop.
385     llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
386 
387     llvm::Type *InTy = NULL;
388     llvm::Value *InBasePtr = NULL;
389     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
390       InTy = (FunctionArgIter++)->getType();
391       InStep = getStepValue(&DL, InTy, Arg_instep);
392       InStep->setName("instep");
393       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
394     }
395 
396     llvm::Type *OutTy = NULL;
397     llvm::Value *OutBasePtr = NULL;
398     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
399       OutTy = (FunctionArgIter++)->getType();
400       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
401       OutStep->setName("outstep");
402       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
403     }
404 
405     llvm::Value *UsrData = NULL;
406     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
407       llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
408       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
409           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
410       UsrData->setName("UsrData");
411     }
412 
413     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
414       FunctionArgIter++;
415     }
416 
417     llvm::Value *Y = NULL;
418     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
419       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
420       FunctionArgIter++;
421     }
422 
423     bccAssert(FunctionArgIter == Function->arg_end());
424 
425     llvm::PHINode *IV;
426     createLoop(Builder, Arg_x1, Arg_x2, &IV);
427 
428     // Populate the actual call to kernel().
429     llvm::SmallVector<llvm::Value*, 8> RootArgs;
430 
431     llvm::Value *InPtr  = NULL;
432     llvm::Value *OutPtr = NULL;
433 
434     // Calculate the current input and output pointers
435     //
436     // We always calculate the input/output pointers with a GEP operating on i8
437     // values and only cast at the very end to OutTy. This is because the step
438     // between two values is given in bytes.
439     //
440     // TODO: We could further optimize the output by using a GEP operation of
441     // type 'OutTy' in cases where the element type of the allocation allows.
442     if (OutBasePtr) {
443       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
444       OutOffset = Builder.CreateMul(OutOffset, OutStep);
445       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
446       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
447     }
448 
449     if (InBasePtr) {
450       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
451       InOffset = Builder.CreateMul(InOffset, InStep);
452       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
453       InPtr = Builder.CreatePointerCast(InPtr, InTy);
454     }
455 
456     if (InPtr) {
457       RootArgs.push_back(InPtr);
458     }
459 
460     if (OutPtr) {
461       RootArgs.push_back(OutPtr);
462     }
463 
464     if (UsrData) {
465       RootArgs.push_back(UsrData);
466     }
467 
468     llvm::Value *X = IV;
469     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
470       RootArgs.push_back(X);
471     }
472 
473     if (Y) {
474       RootArgs.push_back(Y);
475     }
476 
477     Builder.CreateCall(Function, RootArgs);
478 
479     return true;
480   }
481 
482   /* Expand a pass-by-value kernel.
483    */
ExpandKernel(llvm::Function * Function,uint32_t Signature)484   bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
485     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
486     ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
487 
488     // TODO: Refactor this to share functionality with ExpandFunction.
489     llvm::DataLayout DL(Module);
490 
491     llvm::Function *ExpandedFunction =
492       createEmptyExpandedFunction(Function->getName());
493 
494     /*
495      * Extract the expanded function's parameters.  It is guaranteed by
496      * createEmptyExpandedFunction that there will be five parameters.
497      */
498 
499     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
500 
501     llvm::Function::arg_iterator ExpandedFunctionArgIter =
502       ExpandedFunction->arg_begin();
503 
504     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
505     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
506     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
507     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
508     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
509 
510     // Construct the actual function body.
511     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
512 
513     // Create TBAA meta-data.
514     llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
515     llvm::MDBuilder MDHelper(*Context);
516 
517     TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
518     TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
519     TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
520     TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
521     TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
522 
523     /*
524      * Collect and construct the arguments for the kernel().
525      *
526      * Note that we load any loop-invariant arguments before entering the Loop.
527      */
528     size_t NumInputs = Function->arg_size();
529 
530     llvm::Value *Y = NULL;
531     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
532       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
533       --NumInputs;
534     }
535 
536     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
537       --NumInputs;
538     }
539 
540     // No usrData parameter on kernels.
541     bccAssert(
542         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
543 
544     llvm::Function::arg_iterator ArgIter = Function->arg_begin();
545 
546     // Check the return type
547     llvm::Type     *OutTy      = NULL;
548     llvm::Value    *OutStep    = NULL;
549     llvm::LoadInst *OutBasePtr = NULL;
550 
551     bool PassOutByReference = false;
552 
553     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
554       llvm::Type *OutBaseTy = Function->getReturnType();
555 
556       if (OutBaseTy->isVoidTy()) {
557         PassOutByReference = true;
558         OutTy = ArgIter->getType();
559 
560         ArgIter++;
561         --NumInputs;
562       } else {
563         // We don't increment Args, since we are using the actual return type.
564         OutTy = OutBaseTy->getPointerTo();
565       }
566 
567       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
568       OutStep->setName("outstep");
569       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
570       if (gEnableRsTbaa) {
571         OutBasePtr->setMetadata("tbaa", TBAAPointer);
572       }
573     }
574 
575     llvm::SmallVector<llvm::Type*,     8> InTypes;
576     llvm::SmallVector<llvm::Value*,    8> InSteps;
577     llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
578     llvm::SmallVector<bool,            8> InIsStructPointer;
579 
580     if (NumInputs == 1) {
581       llvm::Type *InType = ArgIter->getType();
582 
583       /*
584        * AArch64 calling dictate that structs of sufficient size get passed by
585        * poiter instead of passed by value.  This, combined with the fact that
586        * we don't allow kernels to operate on pointer data means that if we see
587        * a kernel with a pointer parameter we know that it is struct input that
588        * has been promoted.  As such we don't need to convert its type to a
589        * pointer.  Later we will need to know to avoid a load, so we save this
590        * information in InIsStructPointer.
591        */
592       if (!InType->isPointerTy()) {
593         InType = InType->getPointerTo();
594         InIsStructPointer.push_back(false);
595       } else {
596         InIsStructPointer.push_back(true);
597       }
598 
599       llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
600 
601       InStep->setName("instep");
602 
603       llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, 0);
604       llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
605 
606       if (gEnableRsTbaa) {
607         InBasePtr->setMetadata("tbaa", TBAAPointer);
608       }
609 
610       InTypes.push_back(InType);
611       InSteps.push_back(InStep);
612       InBasePtrs.push_back(InBasePtr);
613 
614     } else if (NumInputs > 1) {
615       llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, 10);
616       llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
617                                                       "inputs_base");
618 
619       llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, 11);
620       llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
621                                                          "insteps_base");
622 
623       for (size_t InputIndex = 0; InputIndex < NumInputs;
624            ++InputIndex, ArgIter++) {
625 
626           llvm::Value *IndexVal = Builder.getInt32(InputIndex);
627 
628           llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
629           llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
630                                                           "instep_addr");
631 
632           llvm::Type *InType = ArgIter->getType();
633 
634           /*
635          * AArch64 calling dictate that structs of sufficient size get passed by
636          * poiter instead of passed by value.  This, combined with the fact that
637          * we don't allow kernels to operate on pointer data means that if we
638          * see a kernel with a pointer parameter we know that it is struct input
639          * that has been promoted.  As such we don't need to convert its type to
640          * a pointer.  Later we will need to know to avoid a load, so we save
641          * this information in InIsStructPointer.
642          */
643           if (!InType->isPointerTy()) {
644             InType = InType->getPointerTo();
645             InIsStructPointer.push_back(false);
646           } else {
647             InIsStructPointer.push_back(true);
648           }
649 
650           llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
651 
652           InStep->setName("instep");
653 
654           llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
655           llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
656                                                          "input_base");
657 
658           if (gEnableRsTbaa) {
659             InBasePtr->setMetadata("tbaa", TBAAPointer);
660           }
661 
662           InTypes.push_back(InType);
663           InSteps.push_back(InStep);
664           InBasePtrs.push_back(InBasePtr);
665       }
666     }
667 
668     llvm::PHINode *IV;
669     createLoop(Builder, Arg_x1, Arg_x2, &IV);
670 
671     // Populate the actual call to kernel().
672     llvm::SmallVector<llvm::Value*, 8> RootArgs;
673 
674     // Calculate the current input and output pointers
675     //
676     //
677     // We always calculate the input/output pointers with a GEP operating on i8
678     // values combined with a multiplication and only cast at the very end to
679     // OutTy.  This is to account for dynamic stepping sizes when the value
680     // isn't apparent at compile time.  In the (very common) case when we know
681     // the step size at compile time, due to haveing complete type information
682     // this multiplication will optmized out and produces code equivalent to a
683     // a GEP on a pointer of the correct type.
684 
685     // Output
686 
687     llvm::Value *OutPtr = NULL;
688     if (OutBasePtr) {
689       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
690 
691       OutOffset = Builder.CreateMul(OutOffset, OutStep);
692       OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
693       OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
694 
695       if (PassOutByReference) {
696         RootArgs.push_back(OutPtr);
697       }
698     }
699 
700     // Inputs
701 
702     if (NumInputs > 0) {
703       llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
704 
705       for (size_t Index = 0; Index < NumInputs; ++Index) {
706         llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
707         llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
708 
709         InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
710 
711         llvm::Value *Input;
712 
713         if (InIsStructPointer[Index]) {
714           Input = InPtr;
715 
716         } else {
717           llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
718 
719           if (gEnableRsTbaa) {
720             InputLoad->setMetadata("tbaa", TBAAAllocation);
721           }
722 
723           Input = InputLoad;
724         }
725 
726         RootArgs.push_back(Input);
727       }
728     }
729 
730     llvm::Value *X = IV;
731     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
732       RootArgs.push_back(X);
733     }
734 
735     if (Y) {
736       RootArgs.push_back(Y);
737     }
738 
739     llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
740 
741     if (OutPtr && !PassOutByReference) {
742       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
743       if (gEnableRsTbaa) {
744         Store->setMetadata("tbaa", TBAAAllocation);
745       }
746     }
747 
748     return true;
749   }
750 
751   /// @brief Checks if pointers to allocation internals are exposed
752   ///
753   /// This function verifies if through the parameters passed to the kernel
754   /// or through calls to the runtime library the script gains access to
755   /// pointers pointing to data within a RenderScript Allocation.
756   /// If we know we control all loads from and stores to data within
757   /// RenderScript allocations and if we know the run-time internal accesses
758   /// are all annotated with RenderScript TBAA metadata, only then we
759   /// can safely use TBAA to distinguish between generic and from-allocation
760   /// pointers.
allocPointersExposed(llvm::Module & Module)761   bool allocPointersExposed(llvm::Module &Module) {
762     // Old style kernel function can expose pointers to elements within
763     // allocations.
764     // TODO: Extend analysis to allow simple cases of old-style kernels.
765     for (size_t i = 0; i < mExportForEachCount; ++i) {
766       const char *Name = mExportForEachNameList[i];
767       uint32_t Signature = mExportForEachSignatureList[i];
768       if (Module.getFunction(Name) &&
769           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
770         return true;
771       }
772     }
773 
774     // Check for library functions that expose a pointer to an Allocation or
775     // that are not yet annotated with RenderScript-specific tbaa information.
776     static std::vector<std::string> Funcs;
777 
778     // rsGetElementAt(...)
779     Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
780     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
781     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
782     // rsSetElementAt()
783     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
784     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
785     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
786     // rsGetElementAtYuv_uchar_Y()
787     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
788     // rsGetElementAtYuv_uchar_U()
789     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
790     // rsGetElementAtYuv_uchar_V()
791     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
792 
793     for (std::vector<std::string>::iterator FI = Funcs.begin(),
794                                             FE = Funcs.end();
795          FI != FE; ++FI) {
796       llvm::Function *Function = Module.getFunction(*FI);
797 
798       if (!Function) {
799         ALOGE("Missing run-time function '%s'", FI->c_str());
800         return true;
801       }
802 
803       if (Function->getNumUses() > 0) {
804         return true;
805       }
806     }
807 
808     return false;
809   }
810 
811   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
812   ///
813   /// The TBAA metadata used to annotate loads/stores from RenderScript
814   /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
815   /// root node. LLVM does assume may-alias for all nodes in unrelated alias
816   /// analysis trees. This function makes the RenderScript TBAA a subtree of the
817   /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
818   /// every access to an Allocation is resolved to must-alias if compared to
819   /// a normal C/C++ access.
connectRenderScriptTBAAMetadata(llvm::Module & Module)820   void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
821     llvm::MDBuilder MDHelper(*Context);
822     llvm::MDNode *TBAARenderScript =
823       MDHelper.createTBAARoot("RenderScript TBAA");
824 
825     llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
826     llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
827                                                          TBAARoot);
828 
829     TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
830   }
831 
runOnModule(llvm::Module & Module)832   virtual bool runOnModule(llvm::Module &Module) {
833     bool Changed  = false;
834     this->Module  = &Module;
835     this->Context = &Module.getContext();
836 
837     this->buildTypes();
838 
839     bcinfo::MetadataExtractor me(&Module);
840     if (!me.extract()) {
841       ALOGE("Could not extract metadata from module!");
842       return false;
843     }
844     mExportForEachCount = me.getExportForEachSignatureCount();
845     mExportForEachNameList = me.getExportForEachNameList();
846     mExportForEachSignatureList = me.getExportForEachSignatureList();
847 
848     bool AllocsExposed = allocPointersExposed(Module);
849 
850     for (size_t i = 0; i < mExportForEachCount; ++i) {
851       const char *name = mExportForEachNameList[i];
852       uint32_t signature = mExportForEachSignatureList[i];
853       llvm::Function *kernel = Module.getFunction(name);
854       if (kernel) {
855         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
856           Changed |= ExpandKernel(kernel, signature);
857           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
858         } else if (kernel->getReturnType()->isVoidTy()) {
859           Changed |= ExpandFunction(kernel, signature);
860           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
861         } else {
862           // There are some graphics root functions that are not
863           // expanded, but that will be called directly. For those
864           // functions, we can not set the linkage to internal.
865         }
866       }
867     }
868 
869     if (gEnableRsTbaa && !AllocsExposed) {
870       connectRenderScriptTBAAMetadata(Module);
871     }
872 
873     return Changed;
874   }
875 
getPassName() const876   virtual const char *getPassName() const {
877     return "ForEach-able Function Expansion";
878   }
879 
880 }; // end RSForEachExpandPass
881 
882 } // end anonymous namespace
883 
884 char RSForEachExpandPass::ID = 0;
885 
886 namespace bcc {
887 
888 llvm::ModulePass *
createRSForEachExpandPass(bool pEnableStepOpt)889 createRSForEachExpandPass(bool pEnableStepOpt){
890   return new RSForEachExpandPass(pEnableStepOpt);
891 }
892 
893 } // end namespace bcc
894