• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bcc/Assert.h"
18 #include "bcc/Renderscript/RSTransforms.h"
19 
20 #include <cstdlib>
21 
22 #include <llvm/IR/DerivedTypes.h>
23 #include <llvm/IR/Function.h>
24 #include <llvm/IR/Instructions.h>
25 #include <llvm/IR/IRBuilder.h>
26 #include <llvm/IR/MDBuilder.h>
27 #include <llvm/IR/Module.h>
28 #include <llvm/Pass.h>
29 #include <llvm/Support/raw_ostream.h>
30 #include <llvm/IR/DataLayout.h>
31 #include <llvm/IR/Function.h>
32 #include <llvm/IR/Type.h>
33 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
34 
35 #include "bcc/Config/Config.h"
36 #include "bcc/Renderscript/RSInfo.h"
37 #include "bcc/Support/Log.h"
38 
39 #include "bcinfo/MetadataExtractor.h"
40 
41 using namespace bcc;
42 
43 namespace {
44 
45 /* RSForEachExpandPass - This pass operates on functions that are able to be
46  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
47  * ForEach-able function to be invoked over the appropriate data cells of the
48  * input/output allocations (adjusting other relevant parameters as we go). We
49  * support doing this for any ForEach-able compute kernels. The new function
50  * name is the original function name followed by ".expand". Note that we
51  * still generate code for the original function.
52  */
53 class RSForEachExpandPass : public llvm::ModulePass {
54 private:
55   static char ID;
56 
57   llvm::Module *M;
58   llvm::LLVMContext *C;
59 
60   const RSInfo::ExportForeachFuncListTy &mFuncs;
61 
62   // Turns on optimization of allocation stride values.
63   bool mEnableStepOpt;
64 
getRootSignature(llvm::Function * F)65   uint32_t getRootSignature(llvm::Function *F) {
66     const llvm::NamedMDNode *ExportForEachMetadata =
67         M->getNamedMetadata("#rs_export_foreach");
68 
69     if (!ExportForEachMetadata) {
70       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
71       for (llvm::Function::arg_iterator B = F->arg_begin(),
72                                         E = F->arg_end();
73            B != E;
74            ++B) {
75         RootArgTys.push_back(B->getType());
76       }
77 
78       // For pre-ICS bitcode, we may not have signature information. In that
79       // case, we use the size of the RootArgTys to select the number of
80       // arguments.
81       return (1 << RootArgTys.size()) - 1;
82     }
83 
84     if (ExportForEachMetadata->getNumOperands() == 0) {
85       return 0;
86     }
87 
88     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
89 
90     // We only handle the case for legacy root() functions here, so this is
91     // hard-coded to look at only the first such function.
92     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
93     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
94       llvm::Value *SigVal = SigNode->getOperand(0);
95       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
96         llvm::StringRef SigString =
97             static_cast<llvm::MDString*>(SigVal)->getString();
98         uint32_t Signature = 0;
99         if (SigString.getAsInteger(10, Signature)) {
100           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
101           return 0;
102         }
103         return Signature;
104       }
105     }
106 
107     return 0;
108   }
109 
110   // Get the actual value we should use to step through an allocation.
111   //
112   // Normally the value we use to step through an allocation is given to us by
113   // the driver. However, for certain primitive data types, we can derive an
114   // integer constant for the step value. We use this integer constant whenever
115   // possible to allow further compiler optimizations to take place.
116   //
117   // DL - Target Data size/layout information.
118   // T - Type of allocation (should be a pointer).
119   // OrigStep - Original step increment (root.expand() input from driver).
getStepValue(llvm::DataLayout * DL,llvm::Type * T,llvm::Value * OrigStep)120   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
121                             llvm::Value *OrigStep) {
122     bccAssert(DL);
123     bccAssert(T);
124     bccAssert(OrigStep);
125     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
126     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
127     if (mEnableStepOpt && T != VoidPtrTy && PT) {
128       llvm::Type *ET = PT->getElementType();
129       uint64_t ETSize = DL->getTypeAllocSize(ET);
130       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
131       return llvm::ConstantInt::get(Int32Ty, ETSize);
132     } else {
133       return OrigStep;
134     }
135   }
136 
137   /// @brief Returns the type of the ForEach stub parameter structure.
138   ///
139   /// Renderscript uses a single structure in which all parameters are passed
140   /// to keep the signature of the expanded function independent of the
141   /// parameters passed to it.
getForeachStubTy()142   llvm::Type *getForeachStubTy() {
143     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
144     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
145     llvm::Type *SizeTy = Int32Ty;
146     /* Defined in frameworks/base/libs/rs/rs_hal.h:
147      *
148      * struct RsForEachStubParamStruct {
149      *   const void *in;
150      *   void *out;
151      *   const void *usr;
152      *   size_t usr_len;
153      *   uint32_t x;
154      *   uint32_t y;
155      *   uint32_t z;
156      *   uint32_t lod;
157      *   enum RsAllocationCubemapFace face;
158      *   uint32_t ar[16];
159      * };
160      */
161     llvm::SmallVector<llvm::Type*, 9> StructTys;
162     StructTys.push_back(VoidPtrTy);  // const void *in
163     StructTys.push_back(VoidPtrTy);  // void *out
164     StructTys.push_back(VoidPtrTy);  // const void *usr
165     StructTys.push_back(SizeTy);     // size_t usr_len
166     StructTys.push_back(Int32Ty);    // uint32_t x
167     StructTys.push_back(Int32Ty);    // uint32_t y
168     StructTys.push_back(Int32Ty);    // uint32_t z
169     StructTys.push_back(Int32Ty);    // uint32_t lod
170     StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
171     StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
172 
173     return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
174   }
175 
176   /// @brief Create skeleton of the expanded function.
177   ///
178   /// This creates a function with the following signature:
179   ///
180   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
181   ///         uint32_t instep, uint32_t outstep)
182   ///
createEmptyExpandedFunction(llvm::StringRef OldName)183   llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
184     llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
185     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
186 
187     llvm::SmallVector<llvm::Type*, 8> ParamTys;
188     ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
189     ParamTys.push_back(Int32Ty);           // uint32_t x1
190     ParamTys.push_back(Int32Ty);           // uint32_t x2
191     ParamTys.push_back(Int32Ty);           // uint32_t instep
192     ParamTys.push_back(Int32Ty);           // uint32_t outstep
193 
194     llvm::FunctionType *FT =
195         llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
196     llvm::Function *F =
197         llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
198                                OldName + ".expand", M);
199 
200     llvm::Function::arg_iterator AI = F->arg_begin();
201 
202     AI->setName("p");
203     AI++;
204     AI->setName("x1");
205     AI++;
206     AI->setName("x2");
207     AI++;
208     AI->setName("arg_instep");
209     AI++;
210     AI->setName("arg_outstep");
211     AI++;
212 
213     assert(AI == F->arg_end());
214 
215     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F);
216     llvm::IRBuilder<> Builder(Begin);
217     Builder.CreateRetVoid();
218 
219     return F;
220   }
221 
222   /// @brief Create an empty loop
223   ///
224   /// Create a loop of the form:
225   ///
226   /// for (i = LowerBound; i < UpperBound; i++)
227   ///   ;
228   ///
229   /// After the loop has been created, the builder is set such that
230   /// instructions can be added to the loop body.
231   ///
232   /// @param Builder The builder to use to build this loop. The current
233   ///                position of the builder is the position the loop
234   ///                will be inserted.
235   /// @param LowerBound The first value of the loop iterator
236   /// @param UpperBound The maximal value of the loop iterator
237   /// @param LoopIV A reference that will be set to the loop iterator.
238   /// @return The BasicBlock that will be executed after the loop.
createLoop(llvm::IRBuilder<> & Builder,llvm::Value * LowerBound,llvm::Value * UpperBound,llvm::PHINode ** LoopIV)239   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
240                                llvm::Value *LowerBound,
241                                llvm::Value *UpperBound,
242                                llvm::PHINode **LoopIV) {
243     assert(LowerBound->getType() == UpperBound->getType());
244 
245     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
246     llvm::Value *Cond, *IVNext;
247     llvm::PHINode *IV;
248 
249     CondBB = Builder.GetInsertBlock();
250     AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
251     HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent());
252 
253     // if (LowerBound < Upperbound)
254     //   goto LoopHeader
255     // else
256     //   goto AfterBB
257     CondBB->getTerminator()->eraseFromParent();
258     Builder.SetInsertPoint(CondBB);
259     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
260     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
261 
262     // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
263     // iv.next = iv + 1
264     // if (iv.next < Upperbound)
265     //   goto LoopHeader
266     // else
267     //   goto AfterBB
268     Builder.SetInsertPoint(HeaderBB);
269     IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
270     IV->addIncoming(LowerBound, CondBB);
271     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
272     IV->addIncoming(IVNext, HeaderBB);
273     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
274     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
275     AfterBB->setName("Exit");
276     Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
277     *LoopIV = IV;
278     return AfterBB;
279   }
280 
281 public:
RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy & pForeachFuncs,bool pEnableStepOpt)282   RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
283                       bool pEnableStepOpt)
284       : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
285         mEnableStepOpt(pEnableStepOpt) {
286   }
287 
288   /* Performs the actual optimization on a selected function. On success, the
289    * Module will contain a new function of the name "<NAME>.expand" that
290    * invokes <NAME>() in a loop with the appropriate parameters.
291    */
ExpandFunction(llvm::Function * F,uint32_t Signature)292   bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
293     ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
294 
295     if (!Signature) {
296       Signature = getRootSignature(F);
297       if (!Signature) {
298         // We couldn't determine how to expand this function based on its
299         // function signature.
300         return false;
301       }
302     }
303 
304     llvm::DataLayout DL(M);
305 
306     llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
307 
308     // Create and name the actual arguments to this expanded function.
309     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
310     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
311                                       E = ExpandedFunc->arg_end();
312          B != E;
313          ++B) {
314       ArgVec.push_back(B);
315     }
316 
317     if (ArgVec.size() != 5) {
318       ALOGE("Incorrect number of arguments to function: %zu",
319             ArgVec.size());
320       return false;
321     }
322     llvm::Value *Arg_p = ArgVec[0];
323     llvm::Value *Arg_x1 = ArgVec[1];
324     llvm::Value *Arg_x2 = ArgVec[2];
325     llvm::Value *Arg_instep = ArgVec[3];
326     llvm::Value *Arg_outstep = ArgVec[4];
327 
328     llvm::Value *InStep = NULL;
329     llvm::Value *OutStep = NULL;
330 
331     // Construct the actual function body.
332     llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
333 
334     // Collect and construct the arguments for the kernel().
335     // Note that we load any loop-invariant arguments before entering the Loop.
336     llvm::Function::arg_iterator Args = F->arg_begin();
337 
338     llvm::Type *InTy = NULL;
339     llvm::Value *InBasePtr = NULL;
340     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
341       InTy = Args->getType();
342       InStep = getStepValue(&DL, InTy, Arg_instep);
343       InStep->setName("instep");
344       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
345       Args++;
346     }
347 
348     llvm::Type *OutTy = NULL;
349     llvm::Value *OutBasePtr = NULL;
350     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
351       OutTy = Args->getType();
352       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
353       OutStep->setName("outstep");
354       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
355       Args++;
356     }
357 
358     llvm::Value *UsrData = NULL;
359     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
360       llvm::Type *UsrDataTy = Args->getType();
361       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
362           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
363       UsrData->setName("UsrData");
364       Args++;
365     }
366 
367     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
368       Args++;
369     }
370 
371     llvm::Value *Y = NULL;
372     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
373       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
374       Args++;
375     }
376 
377     bccAssert(Args == F->arg_end());
378 
379     llvm::PHINode *IV;
380     createLoop(Builder, Arg_x1, Arg_x2, &IV);
381 
382     // Populate the actual call to kernel().
383     llvm::SmallVector<llvm::Value*, 8> RootArgs;
384 
385     llvm::Value *InPtr = NULL;
386     llvm::Value *OutPtr = NULL;
387 
388     // Calculate the current input and output pointers
389     //
390     // We always calculate the input/output pointers with a GEP operating on i8
391     // values and only cast at the very end to OutTy. This is because the step
392     // between two values is given in bytes.
393     //
394     // TODO: We could further optimize the output by using a GEP operation of
395     // type 'OutTy' in cases where the element type of the allocation allows.
396     if (OutBasePtr) {
397       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
398       OutOffset = Builder.CreateMul(OutOffset, OutStep);
399       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
400       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
401     }
402     if (InBasePtr) {
403       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
404       InOffset = Builder.CreateMul(InOffset, InStep);
405       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
406       InPtr = Builder.CreatePointerCast(InPtr, InTy);
407     }
408 
409     if (InPtr) {
410       RootArgs.push_back(InPtr);
411     }
412 
413     if (OutPtr) {
414       RootArgs.push_back(OutPtr);
415     }
416 
417     if (UsrData) {
418       RootArgs.push_back(UsrData);
419     }
420 
421     llvm::Value *X = IV;
422     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
423       RootArgs.push_back(X);
424     }
425 
426     if (Y) {
427       RootArgs.push_back(Y);
428     }
429 
430     Builder.CreateCall(F, RootArgs);
431 
432     return true;
433   }
434 
435   /* Expand a pass-by-value kernel.
436    */
ExpandKernel(llvm::Function * F,uint32_t Signature)437   bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
438     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
439     ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
440 
441     // TODO: Refactor this to share functionality with ExpandFunction.
442     llvm::DataLayout DL(M);
443 
444     llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
445 
446     // Create and name the actual arguments to this expanded function.
447     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
448     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
449                                       E = ExpandedFunc->arg_end();
450          B != E;
451          ++B) {
452       ArgVec.push_back(B);
453     }
454 
455     if (ArgVec.size() != 5) {
456       ALOGE("Incorrect number of arguments to function: %zu",
457             ArgVec.size());
458       return false;
459     }
460     llvm::Value *Arg_p = ArgVec[0];
461     llvm::Value *Arg_x1 = ArgVec[1];
462     llvm::Value *Arg_x2 = ArgVec[2];
463     llvm::Value *Arg_instep = ArgVec[3];
464     llvm::Value *Arg_outstep = ArgVec[4];
465 
466     llvm::Value *InStep = NULL;
467     llvm::Value *OutStep = NULL;
468 
469     // Construct the actual function body.
470     llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
471 
472     // Create TBAA meta-data.
473     llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
474 
475     llvm::MDBuilder MDHelper(*C);
476     TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
477     TBAAAllocation = MDHelper.createTBAANode("allocation", TBAARenderScript);
478     TBAAPointer = MDHelper.createTBAANode("pointer", TBAARenderScript);
479 
480     // Collect and construct the arguments for the kernel().
481     // Note that we load any loop-invariant arguments before entering the Loop.
482     llvm::Function::arg_iterator Args = F->arg_begin();
483 
484     llvm::Type *OutTy = NULL;
485     bool PassOutByReference = false;
486     llvm::LoadInst *OutBasePtr = NULL;
487     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
488       llvm::Type *OutBaseTy = F->getReturnType();
489       if (OutBaseTy->isVoidTy()) {
490         PassOutByReference = true;
491         OutTy = Args->getType();
492         Args++;
493       } else {
494         OutTy = OutBaseTy->getPointerTo();
495         // We don't increment Args, since we are using the actual return type.
496       }
497       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
498       OutStep->setName("outstep");
499       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
500       OutBasePtr->setMetadata("tbaa", TBAAPointer);
501     }
502 
503     llvm::Type *InBaseTy = NULL;
504     llvm::Type *InTy = NULL;
505     llvm::LoadInst *InBasePtr = NULL;
506     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
507       InBaseTy = Args->getType();
508       InTy =InBaseTy->getPointerTo();
509       InStep = getStepValue(&DL, InTy, Arg_instep);
510       InStep->setName("instep");
511       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
512       InBasePtr->setMetadata("tbaa", TBAAPointer);
513       Args++;
514     }
515 
516     // No usrData parameter on kernels.
517     bccAssert(
518         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
519 
520     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
521       Args++;
522     }
523 
524     llvm::Value *Y = NULL;
525     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
526       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
527       Args++;
528     }
529 
530     bccAssert(Args == F->arg_end());
531 
532     llvm::PHINode *IV;
533     createLoop(Builder, Arg_x1, Arg_x2, &IV);
534 
535     // Populate the actual call to kernel().
536     llvm::SmallVector<llvm::Value*, 8> RootArgs;
537 
538     llvm::Value *InPtr = NULL;
539     llvm::Value *OutPtr = NULL;
540 
541     // Calculate the current input and output pointers
542     //
543     // We always calculate the input/output pointers with a GEP operating on i8
544     // values and only cast at the very end to OutTy. This is because the step
545     // between two values is given in bytes.
546     //
547     // TODO: We could further optimize the output by using a GEP operation of
548     // type 'OutTy' in cases where the element type of the allocation allows.
549     if (OutBasePtr) {
550       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
551       OutOffset = Builder.CreateMul(OutOffset, OutStep);
552       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
553       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
554     }
555     if (InBasePtr) {
556       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
557       InOffset = Builder.CreateMul(InOffset, InStep);
558       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
559       InPtr = Builder.CreatePointerCast(InPtr, InTy);
560     }
561 
562     if (PassOutByReference) {
563       RootArgs.push_back(OutPtr);
564     }
565 
566     if (InPtr) {
567       llvm::LoadInst *In = Builder.CreateLoad(InPtr, "In");
568       In->setMetadata("tbaa", TBAAAllocation);
569       RootArgs.push_back(In);
570     }
571 
572     llvm::Value *X = IV;
573     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
574       RootArgs.push_back(X);
575     }
576 
577     if (Y) {
578       RootArgs.push_back(Y);
579     }
580 
581     llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
582 
583     if (OutPtr && !PassOutByReference) {
584       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
585       Store->setMetadata("tbaa", TBAAAllocation);
586     }
587 
588     return true;
589   }
590 
591   /// @brief Checks if pointers to allocation internals are exposed
592   ///
593   /// This function verifies if through the parameters passed to the kernel
594   /// or through calls to the runtime library the script gains access to
595   /// pointers pointing to data within a RenderScript Allocation.
596   /// If we know we control all loads from and stores to data within
597   /// RenderScript allocations and if we know the run-time internal accesses
598   /// are all annotated with RenderScript TBAA metadata, only then we
599   /// can safely use TBAA to distinguish between generic and from-allocation
600   /// pointers.
allocPointersExposed(llvm::Module & M)601   bool allocPointersExposed(llvm::Module &M) {
602     // Old style kernel function can expose pointers to elements within
603     // allocations.
604     // TODO: Extend analysis to allow simple cases of old-style kernels.
605     for (RSInfo::ExportForeachFuncListTy::const_iterator
606              func_iter = mFuncs.begin(), func_end = mFuncs.end();
607          func_iter != func_end; func_iter++) {
608       const char *Name = func_iter->first;
609       uint32_t Signature = func_iter->second;
610       if (M.getFunction(Name) &&
611           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
612         return true;
613       }
614     }
615 
616     // Check for library functions that expose a pointer to an Allocation or
617     // that are not yet annotated with RenderScript-specific tbaa information.
618     static std::vector<std::string> Funcs;
619 
620     // rsGetElementAt(...)
621     Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
622     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
623     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
624     // rsSetElementAt()
625     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
626     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
627     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
628     // rsGetElementAtYuv_uchar_Y()
629     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
630     // rsGetElementAtYuv_uchar_U()
631     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
632     // rsGetElementAtYuv_uchar_V()
633     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
634 
635     for (std::vector<std::string>::iterator FI = Funcs.begin(),
636                                             FE = Funcs.end();
637          FI != FE; ++FI) {
638       llvm::Function *F = M.getFunction(*FI);
639 
640       if (!F) {
641         ALOGE("Missing run-time function '%s'", FI->c_str());
642         return true;
643       }
644 
645       if (F->getNumUses() > 0) {
646         return true;
647       }
648     }
649 
650     return false;
651   }
652 
653   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
654   ///
655   /// The TBAA metadata used to annotate loads/stores from RenderScript
656   /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
657   /// root node. LLVM does assume may-alias for all nodes in unrelated alias
658   /// analysis trees. This function makes the RenderScript TBAA a subtree of the
659   /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
660   /// every access to an Allocation is resolved to must-alias if compared to
661   /// a normal C/C++ access.
connectRenderScriptTBAAMetadata(llvm::Module & M)662   void connectRenderScriptTBAAMetadata(llvm::Module &M) {
663     llvm::MDBuilder MDHelper(*C);
664     llvm::MDNode *TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
665 
666     llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA");
667     llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript", TBAARoot);
668 
669     TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
670   }
671 
runOnModule(llvm::Module & M)672   virtual bool runOnModule(llvm::Module &M) {
673     bool Changed = false;
674     this->M = &M;
675     C = &M.getContext();
676 
677     bool AllocsExposed = allocPointersExposed(M);
678 
679     for (RSInfo::ExportForeachFuncListTy::const_iterator
680              func_iter = mFuncs.begin(), func_end = mFuncs.end();
681          func_iter != func_end; func_iter++) {
682       const char *name = func_iter->first;
683       uint32_t signature = func_iter->second;
684       llvm::Function *kernel = M.getFunction(name);
685       if (kernel) {
686         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
687           Changed |= ExpandKernel(kernel, signature);
688           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
689         } else if (kernel->getReturnType()->isVoidTy()) {
690           Changed |= ExpandFunction(kernel, signature);
691           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
692         } else {
693           // There are some graphics root functions that are not
694           // expanded, but that will be called directly. For those
695           // functions, we can not set the linkage to internal.
696         }
697       }
698     }
699 
700     if (!AllocsExposed) {
701       connectRenderScriptTBAAMetadata(M);
702     }
703 
704     return Changed;
705   }
706 
getPassName() const707   virtual const char *getPassName() const {
708     return "ForEach-able Function Expansion";
709   }
710 
711 }; // end RSForEachExpandPass
712 
713 } // end anonymous namespace
714 
715 char RSForEachExpandPass::ID = 0;
716 
717 namespace bcc {
718 
719 llvm::ModulePass *
createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy & pForeachFuncs,bool pEnableStepOpt)720 createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
721                           bool pEnableStepOpt){
722   return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
723 }
724 
725 } // end namespace bcc
726