• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011-2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuExecutable.h"
20 
21 #ifdef RS_COMPATIBILITY_LIB
22     #include <stdio.h>
23     #include <sys/stat.h>
24     #include <unistd.h>
25 #else
26     #include "rsCppUtils.h"
27 
28     #include <bcc/Config.h>
29     #include <bcinfo/MetadataExtractor.h>
30 
31     #include <zlib.h>
32     #include <sys/file.h>
33     #include <sys/types.h>
34     #include <unistd.h>
35 
36     #include <string>
37     #include <vector>
38 #endif
39 
40 #include <set>
41 #include <string>
42 #include <dlfcn.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <iostream>
46 #include <sstream>
47 
48 namespace {
49 
50 static const bool kDebugGlobalVariables = false;
51 
allocationLODIsNull(const android::renderscript::Allocation * alloc)52 static bool allocationLODIsNull(const android::renderscript::Allocation *alloc) {
53   // Even if alloc != nullptr, mallocPtr could be null if
54   // IO_OUTPUT/IO_INPUT with no bound surface.
55   return alloc && alloc->mHal.drvState.lod[0].mallocPtr == nullptr;
56 }
57 
58 #ifndef RS_COMPATIBILITY_LIB
59 
setCompileArguments(std::vector<const char * > * args,const std::string & bcFileName,const char * cacheDir,const char * resName,const char * core_lib,bool useRSDebugContext,const char * bccPluginName,bool emitGlobalInfo,int optLevel,bool emitGlobalInfoSkipConstant)60 static void setCompileArguments(std::vector<const char*>* args,
61                                 const std::string& bcFileName,
62                                 const char* cacheDir, const char* resName,
63                                 const char* core_lib, bool useRSDebugContext,
64                                 const char* bccPluginName, bool emitGlobalInfo,
65                                 int optLevel, bool emitGlobalInfoSkipConstant) {
66     rsAssert(cacheDir && resName && core_lib);
67     args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
68     args->push_back("-unroll-runtime");
69     args->push_back("-scalarize-load-store");
70     if (emitGlobalInfo) {
71         args->push_back("-rs-global-info");
72         if (emitGlobalInfoSkipConstant) {
73             args->push_back("-rs-global-info-skip-constant");
74         }
75     }
76     args->push_back("-o");
77     args->push_back(resName);
78     args->push_back("-output_path");
79     args->push_back(cacheDir);
80     args->push_back("-bclib");
81     args->push_back(core_lib);
82     args->push_back("-mtriple");
83     args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
84     args->push_back("-O");
85 
86     switch (optLevel) {
87     case 0:
88         args->push_back("0");
89         break;
90     case 3:
91         args->push_back("3");
92         break;
93     default:
94         ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
95         args->push_back("3");
96         break;
97     }
98 
99     // Enable workaround for A53 codegen by default.
100 #if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
101     args->push_back("-aarch64-fix-cortex-a53-835769");
102 #endif
103 
104     // Execute the bcc compiler.
105     if (useRSDebugContext) {
106         args->push_back("-rs-debug-ctx");
107     } else {
108         // Only load additional libraries for compiles that don't use
109         // the debug context.
110         if (bccPluginName && strlen(bccPluginName) > 0) {
111 #ifdef __ANDROID__
112             // For Android, -plugin option must be used in order to load the
113             // vendor plugin from the sphal namespace.
114             args->push_back("-plugin");
115 #else
116             args->push_back("-load");
117 #endif
118             args->push_back(bccPluginName);
119         }
120     }
121 
122     args->push_back("-fPIC");
123     args->push_back("-embedRSInfo");
124 
125     args->push_back(bcFileName.c_str());
126     args->push_back(nullptr);
127 }
128 
compileBitcode(const std::string & bcFileName,const char * bitcode,size_t bitcodeSize,std::vector<const char * > & compileArguments)129 static bool compileBitcode(const std::string &bcFileName,
130                            const char *bitcode,
131                            size_t bitcodeSize,
132                            std::vector<const char *> &compileArguments) {
133     rsAssert(bitcode && bitcodeSize);
134 
135     FILE *bcfile = fopen(bcFileName.c_str(), "w");
136     if (!bcfile) {
137         ALOGE("Could not write to %s", bcFileName.c_str());
138         return false;
139     }
140     size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
141     fclose(bcfile);
142     if (nwritten != bitcodeSize) {
143         ALOGE("Could not write %zu bytes to %s", bitcodeSize,
144               bcFileName.c_str());
145         return false;
146     }
147 
148     return android::renderscript::rsuExecuteCommand(
149                    android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
150                    compileArguments.size()-1, compileArguments.data());
151 }
152 
153 // The checksum is unnecessary under a few conditions, since the primary
154 // use-case for it is debugging. If we are loading something from the
155 // system partition (read-only), we know that it was precompiled as part of
156 // application ahead of time (and thus the checksum is completely
157 // unnecessary). The checksum is also unnecessary on release (non-debug)
158 // builds, as the only way to get a shared object is to have compiled the
159 // script once already. On a release build, there is no way to adjust the
160 // other libraries/dependencies, and so the only reason to recompile would
161 // be for a source APK change or an OTA. In either case, the APK would be
162 // reinstalled, which would already clear the code_cache/ directory.
isChecksumNeeded(const char * cacheDir)163 bool isChecksumNeeded(const char *cacheDir) {
164     if ((::strcmp(SYSLIBPATH, cacheDir) == 0) ||
165         (::strcmp(SYSLIBPATH_VNDK, cacheDir) == 0) ||
166         (::strcmp(SYSLIBPATH_VENDOR, cacheDir) == 0))
167         return false;
168     char buf[PROP_VALUE_MAX];
169     android::renderscript::property_get("ro.debuggable", buf, "");
170     return (buf[0] == '1');
171 }
172 
addFileToChecksum(const char * fileName,uint32_t & checksum)173 bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
174     int FD = open(fileName, O_RDONLY);
175     if (FD == -1) {
176         ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
177         return false;
178     }
179 
180     char buf[256];
181     while (true) {
182         ssize_t nread = read(FD, buf, sizeof(buf));
183         if (nread < 0) { // bail out on failed read
184             ALOGE("Error while computing checksum for file \'%s\'", fileName);
185             return false;
186         }
187 
188         checksum = adler32(checksum, (const unsigned char *) buf, nread);
189         if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
190             break;
191     }
192 
193     if (close(FD) != 0) {
194         ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
195         return false;
196     }
197     return true;
198 }
199 
200 #endif  // !defined(RS_COMPATIBILITY_LIB)
201 }  // namespace
202 
203 namespace android {
204 namespace renderscript {
205 
206 #ifndef RS_COMPATIBILITY_LIB
207 
constructBuildChecksum(uint8_t const * bitcode,size_t bitcodeSize,const char * commandLine,const char ** bccFiles,size_t numFiles)208 uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
209                                 const char *commandLine,
210                                 const char** bccFiles, size_t numFiles) {
211     uint32_t checksum = adler32(0L, Z_NULL, 0);
212 
213     // include checksum of bitcode
214     if (bitcode != nullptr && bitcodeSize > 0) {
215         checksum = adler32(checksum, bitcode, bitcodeSize);
216     }
217 
218     // include checksum of command line arguments
219     checksum = adler32(checksum, (const unsigned char *) commandLine,
220                        strlen(commandLine));
221 
222     // include checksum of bccFiles
223     for (size_t i = 0; i < numFiles; i++) {
224         const char* bccFile = bccFiles[i];
225         if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
226             // return empty checksum instead of something partial/corrupt
227             return 0;
228         }
229     }
230 
231     return checksum;
232 }
233 
234 #endif  // !RS_COMPATIBILITY_LIB
235 
RsdCpuScriptImpl(RsdCpuReferenceImpl * ctx,const Script * s)236 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
237     mCtx = ctx;
238     mScript = s;
239 
240     mScriptSO = nullptr;
241 
242     mRoot = nullptr;
243     mRootExpand = nullptr;
244     mInit = nullptr;
245     mFreeChildren = nullptr;
246     mScriptExec = nullptr;
247 
248     mBoundAllocs = nullptr;
249     mIntrinsicData = nullptr;
250     mIsThreadable = true;
251 
252     mBuildChecksum = 0;
253     mChecksumNeeded = false;
254 }
255 
storeRSInfoFromSO()256 bool RsdCpuScriptImpl::storeRSInfoFromSO() {
257     // The shared object may have an invalid build checksum.
258     // Validate and fail early.
259     mScriptExec = ScriptExecutable::createFromSharedObject(
260             mScriptSO, mChecksumNeeded ? mBuildChecksum : 0);
261 
262     if (mScriptExec == nullptr) {
263         return false;
264     }
265 
266     mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
267     if (mRoot) {
268         //ALOGE("Found root(): %p", mRoot);
269     }
270     mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
271     if (mRootExpand) {
272         //ALOGE("Found root.expand(): %p", mRootExpand);
273     }
274     mInit = (InitOrDtorFunc_t) dlsym(mScriptSO, "init");
275     if (mInit) {
276         //ALOGE("Found init(): %p", mInit);
277     }
278     mFreeChildren = (InitOrDtorFunc_t) dlsym(mScriptSO, ".rs.dtor");
279     if (mFreeChildren) {
280         //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
281     }
282 
283     size_t varCount = mScriptExec->getExportedVariableCount();
284     if (varCount > 0) {
285         mBoundAllocs = new Allocation *[varCount];
286         memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
287     }
288 
289     mIsThreadable = mScriptExec->getThreadable();
290     //ALOGE("Script isThreadable? %d", mIsThreadable);
291 
292     if (kDebugGlobalVariables) {
293         mScriptExec->dumpGlobalInfo();
294     }
295 
296     return true;
297 }
298 
init(char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags,char const * bccPluginName)299 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
300                             uint8_t const *bitcode, size_t bitcodeSize,
301                             uint32_t flags, char const *bccPluginName) {
302     //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
303     // bitcode, bitcodeSize, flags, lookupFunc);
304     //ALOGE("rsdScriptInit %p %p", rsc, script);
305 
306     mCtx->lockMutex();
307 #ifndef RS_COMPATIBILITY_LIB
308     bool useRSDebugContext = false;
309 
310     bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
311     if (!bitcodeMetadata.extract()) {
312         ALOGE("Could not extract metadata from bitcode");
313         mCtx->unlockMutex();
314         return false;
315     }
316 
317     const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
318 
319     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
320         useRSDebugContext = true;
321     }
322 
323     int optLevel = mCtx->getContext()->getOptLevel();
324 
325     std::string bcFileName(cacheDir);
326     bcFileName.append("/");
327     bcFileName.append(resName);
328     bcFileName.append(".bc");
329 
330     std::vector<const char*> compileArguments;
331     bool emitGlobalInfo = mCtx->getEmbedGlobalInfo();
332     bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
333     setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
334                         useRSDebugContext, bccPluginName, emitGlobalInfo,
335                         optLevel, emitGlobalInfoSkipConstant);
336 
337     mChecksumNeeded = isChecksumNeeded(cacheDir);
338     if (mChecksumNeeded) {
339         std::vector<const char *> bccFiles = { BCC_EXE_PATH,
340                                                core_lib,
341                                              };
342 
343         // The last argument of compileArguments is a nullptr, so remove 1 from
344         // the size.
345         std::unique_ptr<const char> compileCommandLine(
346             rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
347 
348         mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
349                                                 compileCommandLine.get(),
350                                                 bccFiles.data(), bccFiles.size());
351 
352         if (mBuildChecksum == 0) {
353             // cannot compute checksum but verification is enabled
354             mCtx->unlockMutex();
355             return false;
356         }
357     }
358     else {
359         // add a dummy/constant as a checksum if verification is disabled
360         mBuildChecksum = 0xabadcafe;
361     }
362 
363     // Append build checksum to commandline
364     // Handle the terminal nullptr in compileArguments
365     compileArguments.pop_back();
366     compileArguments.push_back("-build-checksum");
367     std::stringstream ss;
368     ss << std::hex << mBuildChecksum;
369     std::string checksumStr(ss.str());
370     compileArguments.push_back(checksumStr.c_str());
371     compileArguments.push_back(nullptr);
372 
373     const bool reuse = !is_force_recompile() && !useRSDebugContext;
374     if (reuse) {
375         mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
376 
377         // Read RS info from the shared object to detect checksum mismatch
378         if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
379             dlclose(mScriptSO);
380             mScriptSO = nullptr;
381         }
382     }
383 
384     // If reuse is desired and we can't, it's either not there or out of date.
385     // We compile the bit code and try loading again.
386     if (mScriptSO == nullptr) {
387         if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
388                             compileArguments))
389         {
390             ALOGE("bcc: FAILS to compile '%s'", resName);
391             mCtx->unlockMutex();
392             return false;
393         }
394 
395         std::string SOPath;
396 
397         if (!SharedLibraryUtils::createSharedLibrary(
398                 mCtx->getContext()->getDriverName(), cacheDir, resName, reuse,
399                 &SOPath)) {
400             ALOGE("Linker: Failed to link object file '%s'", resName);
401             mCtx->unlockMutex();
402             return false;
403         }
404 
405         if (reuse) {
406             mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
407         } else {
408             mScriptSO = SharedLibraryUtils::loadAndDeleteSharedLibrary(SOPath.c_str());
409         }
410         if (mScriptSO == nullptr) {
411             ALOGE("Unable to load '%s'", resName);
412             mCtx->unlockMutex();
413             return false;
414         }
415 
416         // Read RS symbol information from the .so.
417         if (!storeRSInfoFromSO()) {
418             goto error;
419         }
420     }
421 
422     mBitcodeFilePath.assign(bcFileName.c_str());
423 
424 #else  // RS_COMPATIBILITY_LIB is defined
425     const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
426     mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
427 
428     if (!mScriptSO) {
429         goto error;
430     }
431 
432     if (!storeRSInfoFromSO()) {
433         goto error;
434     }
435 #endif
436     mCtx->unlockMutex();
437     return true;
438 
439 error:
440 
441     mCtx->unlockMutex();
442     if (mScriptSO) {
443         dlclose(mScriptSO);
444         mScriptSO = nullptr;
445     }
446     return false;
447 }
448 
449 #ifndef RS_COMPATIBILITY_LIB
450 
findCoreLib(const bcinfo::MetadataExtractor & ME,const char * bitcode,size_t bitcodeSize)451 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
452                                           size_t bitcodeSize) {
453     const char* defaultLib = SYSLIBPATH_BC"/libclcore.bc";
454 
455     // If we're debugging, use the debug library.
456     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
457         if (ME.hasDebugInfo()) {
458             return SYSLIBPATH_BC"/libclcore_debug_g.bc";
459         }
460         return SYSLIBPATH_BC"/libclcore_debug.bc";
461     }
462 
463     if (ME.hasDebugInfo()) {
464         return SYSLIBPATH_BC"/libclcore_g.bc";
465     }
466 
467     // If a callback has been registered to specify a library, use that.
468     RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
469     if (selectRTCallback != nullptr) {
470         return selectRTCallback((const char*)bitcode, bitcodeSize);
471     }
472 
473     // Check for a platform specific library
474 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
475     enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
476     if (prec == bcinfo::RS_FP_Relaxed) {
477         // NEON-capable ARMv7a devices can use an accelerated math library
478         // for all reduced precision scripts.
479         // ARMv8 does not use NEON, as ASIMD can be used with all precision
480         // levels.
481         return SYSLIBPATH_BC"/libclcore_neon.bc";
482     } else {
483         return defaultLib;
484     }
485 #elif defined(__i386__) || defined(__x86_64__)
486     // x86 devices will use an optimized library.
487     return SYSLIBPATH_BC"/libclcore_x86.bc";
488 #else
489     return defaultLib;
490 #endif
491 }
492 
493 #endif
494 
populateScript(Script * script)495 void RsdCpuScriptImpl::populateScript(Script *script) {
496     // Copy info over to runtime
497     script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
498     script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount();
499     script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount();
500     script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
501     script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
502     script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
503     script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
504 
505     // Bug, need to stash in metadata
506     if (mRootExpand) {
507         script->mHal.info.root = mRootExpand;
508     } else {
509         script->mHal.info.root = mRoot;
510     }
511 }
512 
513 // Set up the launch dimensions, and write the values of the launch
514 // dimensions into the mtls start/end fields.
515 //
516 // Inputs:
517 //    baseDim - base shape of the input
518 //         sc - used to constrain the launch dimensions
519 //
520 // Returns:
521 //   True on success, false on failure to set up
setUpMtlsDimensions(MTLaunchStructCommon * mtls,const RsLaunchDimensions & baseDim,const RsScriptCall * sc)522 bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls,
523                                            const RsLaunchDimensions &baseDim,
524                                            const RsScriptCall *sc) {
525     rsAssert(mtls);
526 
527 #define SET_UP_DIMENSION(DIM_FIELD, SC_FIELD) do {            \
528     if (!sc || (sc->SC_FIELD##End == 0)) {                    \
529         mtls->end.DIM_FIELD = baseDim.DIM_FIELD;              \
530     } else {                                                  \
531         mtls->start.DIM_FIELD =                               \
532             rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##Start);    \
533         mtls->end.DIM_FIELD =                                 \
534             rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##End);      \
535         if (mtls->start.DIM_FIELD >= mtls->end.DIM_FIELD) {   \
536             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, \
537                 "Failed to launch kernel; Invalid "           \
538                 #SC_FIELD "Start or " #SC_FIELD "End.");      \
539             return false;                                     \
540         }                                                     \
541     }} while(0)
542 
543     SET_UP_DIMENSION(x, x);
544     SET_UP_DIMENSION(y, y);
545     SET_UP_DIMENSION(z, z);
546     // Checks and setup of fields other than x, y, z are ignored, since those
547     // fields are not used in the runtime and are not visible in the Java API.
548 #undef SET_UP_DIMENSION
549 
550     return true;
551 }
552 
553 // Preliminary work to prepare a general reduce-style kernel for launch.
reduceMtlsSetup(const Allocation ** ains,uint32_t inLen,const Allocation * aout,const RsScriptCall * sc,MTLaunchStructReduce * mtls)554 bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation ** ains,
555                                        uint32_t inLen,
556                                        const Allocation * aout,
557                                        const RsScriptCall *sc,
558                                        MTLaunchStructReduce *mtls) {
559     rsAssert(ains && (inLen >= 1) && aout);
560     memset(mtls, 0, sizeof(MTLaunchStructReduce));
561     mtls->dimPtr = &mtls->redp.dim;
562 
563     for (int index = inLen; --index >= 0;) {
564         if (allocationLODIsNull(ains[index])) {
565             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
566                                          "reduce called with null in allocations");
567             return false;
568         }
569     }
570 
571     if (allocationLODIsNull(aout)) {
572         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
573                                      "reduce called with null out allocation");
574         return false;
575     }
576 
577     const Allocation *ain0   = ains[0];
578     const Type       *inType = ain0->getType();
579 
580     mtls->redp.dim.x = inType->getDimX();
581     mtls->redp.dim.y = inType->getDimY();
582     mtls->redp.dim.z = inType->getDimZ();
583 
584     for (int Index = inLen; --Index >= 1;) {
585         if (!ain0->hasSameDims(ains[Index])) {
586             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
587                                          "Failed to launch reduction kernel;"
588                                          "dimensions of input allocations do not match.");
589             return false;
590         }
591     }
592 
593     if (!setUpMtlsDimensions(mtls, mtls->redp.dim, sc)) {
594         return false;
595     }
596 
597     // The X & Y walkers always want 0-1 min even if dim is not present
598     mtls->end.x = rsMax((uint32_t)1, mtls->end.x);
599     mtls->end.y = rsMax((uint32_t)1, mtls->end.y);
600 
601     mtls->rs = mCtx;
602 
603     mtls->mSliceNum    = 0;
604     mtls->mSliceSize   = 1;
605     mtls->isThreadable = mIsThreadable;
606 
607     // Set up output,
608     mtls->redp.outLen = 1;
609     mtls->redp.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
610     mtls->redp.outStride[0] = aout->getType()->getElementSizeBytes();
611 
612     // Set up input.
613     memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
614     mtls->redp.inLen = inLen;
615     for (int index = inLen; --index >= 0;) {
616         mtls->redp.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
617         mtls->redp.inStride[index] = ains[index]->getType()->getElementSizeBytes();
618     }
619 
620     // All validation passed, ok to launch threads
621     return true;
622 }
623 
624 // Preliminary work to prepare a forEach-style kernel for launch.
forEachMtlsSetup(const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc,MTLaunchStructForEach * mtls)625 bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
626                                         uint32_t inLen,
627                                         Allocation * aout,
628                                         const void * usr, uint32_t usrLen,
629                                         const RsScriptCall *sc,
630                                         MTLaunchStructForEach *mtls) {
631     if (ains == nullptr && inLen != 0) {
632         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
633           "rsForEach called with none-zero inLen with null in allocations");
634         return false;
635     }
636 
637     memset(mtls, 0, sizeof(MTLaunchStructForEach));
638     mtls->dimPtr = &mtls->fep.dim;
639 
640     for (int index = inLen; --index >= 0;) {
641         if (allocationLODIsNull(ains[index])) {
642             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
643                                          "rsForEach called with null in allocations");
644             return false;
645         }
646     }
647 
648     if (allocationLODIsNull(aout)) {
649         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
650                                      "rsForEach called with null out allocations");
651         return false;
652     }
653 
654     // The only situation where ains[j] is null is when inLen==1 and j==0;
655     // and that can only happen for an old-style kernel in API level 11~13,
656     // where the input allocation cannot be skipped if the output allocation is specified.
657     if (inLen != 0)
658         rsAssert((inLen == 1) || (ains[0] != nullptr));
659 
660     if (inLen > 0 && ains[0]) {
661         const Allocation *ain0   = ains[0];
662         const Type       *inType = ain0->getType();
663 
664         mtls->fep.dim.x = inType->getDimX();
665         mtls->fep.dim.y = inType->getDimY();
666         mtls->fep.dim.z = inType->getDimZ();
667 
668         for (int Index = inLen; --Index >= 1;) {
669             if (!ain0->hasSameDims(ains[Index])) {
670                 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
671                   "Failed to launch kernel; dimensions of input "
672                   "allocations do not match.");
673                 return false;
674             }
675         }
676     } else if (aout != nullptr) {
677         const Type *outType = aout->getType();
678 
679         mtls->fep.dim.x = outType->getDimX();
680         mtls->fep.dim.y = outType->getDimY();
681         mtls->fep.dim.z = outType->getDimZ();
682 
683     } else if (sc != nullptr) {
684         mtls->fep.dim.x = sc->xEnd;
685         mtls->fep.dim.y = sc->yEnd;
686         mtls->fep.dim.z = 0;
687     } else {
688         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
689                                      "rsForEach called with null allocations");
690         return false;
691     }
692 
693     if (inLen > 0 && aout != nullptr) {
694         if (ains[0] && !ains[0]->hasSameDims(aout)) {
695             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
696               "Failed to launch kernel; dimensions of input and output allocations do not match.");
697 
698             return false;
699         }
700     }
701 
702     if (!setUpMtlsDimensions(mtls, mtls->fep.dim, sc)) {
703         return false;
704     }
705 
706     // The X & Y walkers always want 0-1 min even if dim is not present
707     mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
708     mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
709     mtls->rs       = mCtx;
710     if (ains) {
711         memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
712     }
713     mtls->aout[0]    = aout;
714     mtls->fep.usr    = usr;
715     mtls->fep.usrLen = usrLen;
716     mtls->mSliceSize = 1;
717     mtls->mSliceNum  = 0;
718 
719     mtls->isThreadable  = mIsThreadable;
720 
721     if (inLen > 0) {
722         mtls->fep.inLen = inLen;
723         for (int index = inLen; --index >= 0;) {
724             if (ains[index] == nullptr) {
725                 // In old style kernels, the first and only input allocation could be null.
726                 // Not allowed in newer styles.
727                 rsAssert(inLen == 1 && index == 0);
728                 continue;
729             }
730             mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
731             mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
732         }
733     }
734 
735     if (aout != nullptr) {
736         mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
737         mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
738     }
739 
740     // All validation passed, ok to launch threads
741     return true;
742 }
743 
744 
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)745 void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
746                                      const Allocation ** ains,
747                                      uint32_t inLen,
748                                      Allocation * aout,
749                                      const void * usr,
750                                      uint32_t usrLen,
751                                      const RsScriptCall *sc) {
752 
753     MTLaunchStructForEach mtls;
754 
755     if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
756         forEachKernelSetup(slot, &mtls);
757 
758         RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
759         mCtx->launchForEach(ains, inLen, aout, sc, &mtls);
760         mCtx->setTLS(oldTLS);
761     }
762 }
763 
invokeReduce(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc)764 void RsdCpuScriptImpl::invokeReduce(uint32_t slot,
765                                     const Allocation ** ains, uint32_t inLen,
766                                     Allocation *aout,
767                                     const RsScriptCall *sc) {
768   MTLaunchStructReduce mtls;
769 
770   if (reduceMtlsSetup(ains, inLen, aout, sc, &mtls)) {
771     reduceKernelSetup(slot, &mtls);
772     RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
773     mCtx->launchReduce(ains, inLen, aout, &mtls);
774     mCtx->setTLS(oldTLS);
775   }
776 }
777 
forEachKernelSetup(uint32_t slot,MTLaunchStructForEach * mtls)778 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) {
779     mtls->script = this;
780     mtls->fep.slot = slot;
781     mtls->kernel = mScriptExec->getForEachFunction(slot);
782     rsAssert(mtls->kernel != nullptr);
783 }
784 
reduceKernelSetup(uint32_t slot,MTLaunchStructReduce * mtls)785 void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) {
786     mtls->script = this;
787     mtls->redp.slot = slot;
788 
789     const ReduceDescription *desc = mScriptExec->getReduceDescription(slot);
790     mtls->accumFunc = desc->accumFunc;
791     mtls->initFunc  = desc->initFunc;   // might legally be nullptr
792     mtls->combFunc  = desc->combFunc;   // might legally be nullptr
793     mtls->outFunc   = desc->outFunc;    // might legally be nullptr
794     mtls->accumSize = desc->accumSize;
795 
796     rsAssert(mtls->accumFunc != nullptr);
797 }
798 
invokeRoot()799 int RsdCpuScriptImpl::invokeRoot() {
800     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
801     int ret = mRoot();
802     mCtx->setTLS(oldTLS);
803     return ret;
804 }
805 
invokeInit()806 void RsdCpuScriptImpl::invokeInit() {
807     if (mInit) {
808         mInit();
809     }
810 }
811 
invokeFreeChildren()812 void RsdCpuScriptImpl::invokeFreeChildren() {
813     if (mFreeChildren) {
814         mFreeChildren();
815     }
816 }
817 
invokeFunction(uint32_t slot,const void * params,size_t paramLength)818 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
819                                       size_t paramLength) {
820     //ALOGE("invoke %i %p %zu", slot, params, paramLength);
821     void * ap = nullptr;
822 
823 #if defined(__x86_64__)
824     // The invoked function could have input parameter of vector type for example float4 which
825     // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
826     // So try to align void* params before passing them into RS exported function.
827 
828     if ((uint8_t)(uint64_t)params & 0x0F) {
829         if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
830             memcpy(ap, params, paramLength);
831         } else {
832             ALOGE("x86_64: invokeFunction memalign error, still use params which"
833                   " is not 16 bytes aligned.");
834         }
835     }
836 #endif
837 
838     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
839     reinterpret_cast<void (*)(const void *, uint32_t)>(
840         mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
841 
842 #if defined(__x86_64__)
843     free(ap);
844 #endif
845 
846     mCtx->setTLS(oldTLS);
847 }
848 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)849 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
850     //rsAssert(!script->mFieldIsObject[slot]);
851     //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
852 
853     //if (mIntrinsicID) {
854         //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
855         //return;
856     //}
857 
858     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
859     if (!destPtr) {
860         //ALOGV("Calling setVar on slot = %i which is null", slot);
861         return;
862     }
863 
864     memcpy(destPtr, data, dataLength);
865 }
866 
getGlobalVar(uint32_t slot,void * data,size_t dataLength)867 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
868     //rsAssert(!script->mFieldIsObject[slot]);
869     //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
870 
871     int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
872     if (!srcPtr) {
873         //ALOGV("Calling setVar on slot = %i which is null", slot);
874         return;
875     }
876     memcpy(data, srcPtr, dataLength);
877 }
878 
879 
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * elem,const uint32_t * dims,size_t dimLength)880 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
881                                                 const Element *elem,
882                                                 const uint32_t *dims, size_t dimLength) {
883     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
884     if (!destPtr) {
885         //ALOGV("Calling setVar on slot = %i which is null", slot);
886         return;
887     }
888 
889     // We want to look at dimension in terms of integer components,
890     // but dimLength is given in terms of bytes.
891     dimLength /= sizeof(int);
892 
893     // Only a single dimension is currently supported.
894     rsAssert(dimLength == 1);
895     if (dimLength == 1) {
896         // First do the increment loop.
897         size_t stride = elem->getSizeBytes();
898         const char *cVal = reinterpret_cast<const char *>(data);
899         for (uint32_t i = 0; i < dims[0]; i++) {
900             elem->incRefs(cVal);
901             cVal += stride;
902         }
903 
904         // Decrement loop comes after (to prevent race conditions).
905         char *oldVal = reinterpret_cast<char *>(destPtr);
906         for (uint32_t i = 0; i < dims[0]; i++) {
907             elem->decRefs(oldVal);
908             oldVal += stride;
909         }
910     }
911 
912     memcpy(destPtr, data, dataLength);
913 }
914 
setGlobalBind(uint32_t slot,Allocation * data)915 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
916 
917     //rsAssert(!script->mFieldIsObject[slot]);
918     //ALOGE("setGlobalBind %i %p", slot, data);
919 
920     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
921     if (!destPtr) {
922         //ALOGV("Calling setVar on slot = %i which is null", slot);
923         return;
924     }
925 
926     void *ptr = nullptr;
927     mBoundAllocs[slot] = data;
928     if (data) {
929         ptr = data->mHal.drvState.lod[0].mallocPtr;
930     }
931     memcpy(destPtr, &ptr, sizeof(void *));
932 }
933 
setGlobalObj(uint32_t slot,ObjectBase * data)934 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
935 
936     //rsAssert(script->mFieldIsObject[slot]);
937     //ALOGE("setGlobalObj %i %p", slot, data);
938 
939     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
940     if (!destPtr) {
941         //ALOGV("Calling setVar on slot = %i which is null", slot);
942         return;
943     }
944 
945     rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
946 }
947 
getFieldName(uint32_t slot) const948 const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
949     return mScriptExec->getFieldName(slot);
950 }
951 
~RsdCpuScriptImpl()952 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
953     delete mScriptExec;
954     delete[] mBoundAllocs;
955     if (mScriptSO) {
956         dlclose(mScriptSO);
957     }
958 }
959 
getAllocationForPointer(const void * ptr) const960 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
961     if (!ptr) {
962         return nullptr;
963     }
964 
965     for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
966         Allocation *a = mBoundAllocs[ct];
967         if (!a) continue;
968         if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
969             return a;
970         }
971     }
972     ALOGE("rsGetAllocation, failed to find %p", ptr);
973     return nullptr;
974 }
975 
getGlobalEntries() const976 int RsdCpuScriptImpl::getGlobalEntries() const {
977     return mScriptExec->getGlobalEntries();
978 }
979 
getGlobalName(int i) const980 const char * RsdCpuScriptImpl::getGlobalName(int i) const {
981     return mScriptExec->getGlobalName(i);
982 }
983 
getGlobalAddress(int i) const984 const void * RsdCpuScriptImpl::getGlobalAddress(int i) const {
985     return mScriptExec->getGlobalAddress(i);
986 }
987 
getGlobalSize(int i) const988 size_t RsdCpuScriptImpl::getGlobalSize(int i) const {
989     return mScriptExec->getGlobalSize(i);
990 }
991 
getGlobalProperties(int i) const992 uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const {
993     return mScriptExec->getGlobalProperties(i);
994 }
995 
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)996 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
997                                  uint32_t inLen, Allocation * aout,
998                                  const void * usr, uint32_t usrLen,
999                                  const RsScriptCall *sc) {}
1000 
postLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)1001 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1002                                   uint32_t inLen, Allocation * aout,
1003                                   const void * usr, uint32_t usrLen,
1004                                   const RsScriptCall *sc) {}
1005 
1006 
1007 } // namespace renderscript
1008 } // namespace android
1009