1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuScriptGroup.h"
20
21 #include <malloc.h>
22 #include "rsContext.h"
23
24 #include <sys/types.h>
25 #include <sys/resource.h>
26 #include <sched.h>
27 #include <sys/syscall.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <fcntl.h>
34
35 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
36 #include <cutils/properties.h>
37 #include "utils/StopWatch.h"
38 #endif
39
40 #ifdef RS_SERVER
41 // Android exposes gettid(), standard Linux does not
gettid()42 static pid_t gettid() {
43 return syscall(SYS_gettid);
44 }
45 #endif
46
47 using namespace android;
48 using namespace android::renderscript;
49
50 typedef void (*outer_foreach_t)(
51 const android::renderscript::RsForEachStubParamStruct *,
52 uint32_t x1, uint32_t x2,
53 uint32_t instep, uint32_t outstep);
54
55
56 static pthread_key_t gThreadTLSKey = 0;
57 static uint32_t gThreadTLSKeyCount = 0;
58 static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
59
60 bool android::renderscript::gArchUseSIMD = false;
61
~RsdCpuReference()62 RsdCpuReference::~RsdCpuReference() {
63 }
64
create(Context * rsc,uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn,bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,RSSelectRTCallback pSelectRTCallback,const char * pBccPluginName)65 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
66 uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
67 #ifndef RS_COMPATIBILITY_LIB
68 , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
69 RSSelectRTCallback pSelectRTCallback,
70 const char *pBccPluginName
71 #endif
72 ) {
73
74 RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
75 if (!cpu) {
76 return NULL;
77 }
78 if (!cpu->init(version_major, version_minor, lfn, slfn)) {
79 delete cpu;
80 return NULL;
81 }
82
83 #ifndef RS_COMPATIBILITY_LIB
84 cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
85 cpu->setSelectRTCallback(pSelectRTCallback);
86 if (pBccPluginName) {
87 cpu->setBccPluginName(pBccPluginName);
88 }
89 #endif
90
91 return cpu;
92 }
93
94
getTlsContext()95 Context * RsdCpuReference::getTlsContext() {
96 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
97 return tls->mContext;
98 }
99
getTlsScript()100 const Script * RsdCpuReference::getTlsScript() {
101 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
102 return tls->mScript;
103 }
104
getThreadTLSKey()105 pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; }
106
107 ////////////////////////////////////////////////////////////
108 ///
109
RsdCpuReferenceImpl(Context * rsc)110 RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) {
111 mRSC = rsc;
112
113 version_major = 0;
114 version_minor = 0;
115 mInForEach = false;
116 memset(&mWorkers, 0, sizeof(mWorkers));
117 memset(&mTlsStruct, 0, sizeof(mTlsStruct));
118 mExit = false;
119 #ifndef RS_COMPATIBILITY_LIB
120 mLinkRuntimeCallback = NULL;
121 mSelectRTCallback = NULL;
122 mSetupCompilerCallback = NULL;
123 #endif
124 }
125
126
helperThreadProc(void * vrsc)127 void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
128 RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
129
130 uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
131
132 //ALOGV("RS helperThread starting %p idx=%i", dc, idx);
133
134 dc->mWorkers.mLaunchSignals[idx].init();
135 dc->mWorkers.mNativeThreadId[idx] = gettid();
136
137 memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct));
138 int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct);
139 if (status) {
140 ALOGE("pthread_setspecific %i", status);
141 }
142
143 #if 0
144 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
145 cpu_set_t cpuset;
146 memset(&cpuset, 0, sizeof(cpuset));
147 cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
148 int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
149 sizeof(cpuset), &cpuset);
150 ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
151 #endif
152
153 while (!dc->mExit) {
154 dc->mWorkers.mLaunchSignals[idx].wait();
155 if (dc->mWorkers.mLaunchCallback) {
156 // idx +1 is used because the calling thread is always worker 0.
157 dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
158 }
159 __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
160 dc->mWorkers.mCompleteSignal.set();
161 }
162
163 //ALOGV("RS helperThread exited %p idx=%i", dc, idx);
164 return NULL;
165 }
166
launchThreads(WorkerCallback_t cbk,void * data)167 void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
168 mWorkers.mLaunchData = data;
169 mWorkers.mLaunchCallback = cbk;
170
171 // fast path for very small launches
172 MTLaunchStruct *mtls = (MTLaunchStruct *)data;
173 if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) {
174 if (mWorkers.mLaunchCallback) {
175 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
176 }
177 return;
178 }
179
180 mWorkers.mRunningCount = mWorkers.mCount;
181 __sync_synchronize();
182
183 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
184 mWorkers.mLaunchSignals[ct].set();
185 }
186
187 // We use the calling thread as one of the workers so we can start without
188 // the delay of the thread wakeup.
189 if (mWorkers.mLaunchCallback) {
190 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
191 }
192
193 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
194 mWorkers.mCompleteSignal.wait();
195 }
196 }
197
198
lockMutex()199 void RsdCpuReferenceImpl::lockMutex() {
200 pthread_mutex_lock(&gInitMutex);
201 }
202
unlockMutex()203 void RsdCpuReferenceImpl::unlockMutex() {
204 pthread_mutex_unlock(&gInitMutex);
205 }
206
207 static int
read_file(const char * pathname,char * buffer,size_t buffsize)208 read_file(const char* pathname, char* buffer, size_t buffsize)
209 {
210 int fd, len;
211
212 fd = open(pathname, O_RDONLY);
213 if (fd < 0)
214 return -1;
215
216 do {
217 len = read(fd, buffer, buffsize);
218 } while (len < 0 && errno == EINTR);
219
220 close(fd);
221
222 return len;
223 }
224
GetCpuInfo()225 static void GetCpuInfo() {
226 char cpuinfo[4096];
227 int cpuinfo_len;
228
229 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, sizeof cpuinfo);
230 if (cpuinfo_len < 0) /* should not happen */ {
231 return;
232 }
233
234 #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
235 gArchUseSIMD = (!!strstr(cpuinfo, " neon")) ||
236 (!!strstr(cpuinfo, " asimd"));
237 #elif defined(ARCH_X86_HAVE_SSSE3)
238 gArchUseSIMD = !!strstr(cpuinfo, " ssse3");
239 #endif
240 }
241
init(uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn)242 bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
243 sym_lookup_t lfn, script_lookup_t slfn) {
244
245 mSymLookupFn = lfn;
246 mScriptLookupFn = slfn;
247
248 lockMutex();
249 if (!gThreadTLSKeyCount) {
250 int status = pthread_key_create(&gThreadTLSKey, NULL);
251 if (status) {
252 ALOGE("Failed to init thread tls key.");
253 unlockMutex();
254 return false;
255 }
256 }
257 gThreadTLSKeyCount++;
258 unlockMutex();
259
260 mTlsStruct.mContext = mRSC;
261 mTlsStruct.mScript = NULL;
262 int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct);
263 if (status) {
264 ALOGE("pthread_setspecific %i", status);
265 }
266
267 GetCpuInfo();
268
269 int cpu = sysconf(_SC_NPROCESSORS_CONF);
270 if(mRSC->props.mDebugMaxThreads) {
271 cpu = mRSC->props.mDebugMaxThreads;
272 }
273 if (cpu < 2) {
274 mWorkers.mCount = 0;
275 return true;
276 }
277
278 // Subtract one from the cpu count because we also use the command thread as a worker.
279 mWorkers.mCount = (uint32_t)(cpu - 1);
280
281 ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
282
283 mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
284 mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
285 mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
286 mWorkers.mLaunchCallback = NULL;
287
288 mWorkers.mCompleteSignal.init();
289
290 mWorkers.mRunningCount = mWorkers.mCount;
291 mWorkers.mLaunchCount = 0;
292 __sync_synchronize();
293
294 pthread_attr_t threadAttr;
295 status = pthread_attr_init(&threadAttr);
296 if (status) {
297 ALOGE("Failed to init thread attribute.");
298 return false;
299 }
300
301 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
302 status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
303 if (status) {
304 mWorkers.mCount = ct;
305 ALOGE("Created fewer than expected number of RS threads.");
306 break;
307 }
308 }
309 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
310 usleep(100);
311 }
312
313 pthread_attr_destroy(&threadAttr);
314 return true;
315 }
316
317
setPriority(int32_t priority)318 void RsdCpuReferenceImpl::setPriority(int32_t priority) {
319 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
320 setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority);
321 }
322 }
323
~RsdCpuReferenceImpl()324 RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
325 mExit = true;
326 mWorkers.mLaunchData = NULL;
327 mWorkers.mLaunchCallback = NULL;
328 mWorkers.mRunningCount = mWorkers.mCount;
329 __sync_synchronize();
330 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
331 mWorkers.mLaunchSignals[ct].set();
332 }
333 void *res;
334 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
335 pthread_join(mWorkers.mThreadId[ct], &res);
336 }
337 rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
338 free(mWorkers.mThreadId);
339 free(mWorkers.mNativeThreadId);
340 delete[] mWorkers.mLaunchSignals;
341
342 // Global structure cleanup.
343 lockMutex();
344 --gThreadTLSKeyCount;
345 if (!gThreadTLSKeyCount) {
346 pthread_key_delete(gThreadTLSKey);
347 }
348 unlockMutex();
349
350 }
351
352 typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
353
wc_xy(void * usr,uint32_t idx)354 static void wc_xy(void *usr, uint32_t idx) {
355 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
356 RsForEachStubParamStruct p;
357 memcpy(&p, &mtls->fep, sizeof(p));
358 p.lid = idx;
359 uint32_t sig = mtls->sig;
360
361 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
362 while (1) {
363 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
364 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
365 uint32_t yEnd = yStart + mtls->mSliceSize;
366 yEnd = rsMin(yEnd, mtls->yEnd);
367 if (yEnd <= yStart) {
368 return;
369 }
370
371 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
372 //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
373
374 for (p.y = yStart; p.y < yEnd; p.y++) {
375 p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
376 (mtls->fep.eStrideOut * mtls->xStart);
377 p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
378 (mtls->fep.eStrideIn * mtls->xStart);
379 fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
380 }
381 }
382 }
383
wc_x(void * usr,uint32_t idx)384 static void wc_x(void *usr, uint32_t idx) {
385 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
386 RsForEachStubParamStruct p;
387 memcpy(&p, &mtls->fep, sizeof(p));
388 p.lid = idx;
389 uint32_t sig = mtls->sig;
390
391 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
392 while (1) {
393 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
394 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
395 uint32_t xEnd = xStart + mtls->mSliceSize;
396 xEnd = rsMin(xEnd, mtls->xEnd);
397 if (xEnd <= xStart) {
398 return;
399 }
400
401 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
402 //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
403
404 p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
405 p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
406 fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
407 }
408 }
409
launchThreads(const Allocation * ain,Allocation * aout,const RsScriptCall * sc,MTLaunchStruct * mtls)410 void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
411 const RsScriptCall *sc, MTLaunchStruct *mtls) {
412
413 //android::StopWatch kernel_time("kernel time");
414
415 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
416 const size_t targetByteChunk = 16 * 1024;
417 mInForEach = true;
418 if (mtls->fep.dimY > 1) {
419 uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
420 uint32_t s2 = 0;
421
422 // This chooses our slice size to rate limit atomic ops to
423 // one per 16k bytes of reads/writes.
424 if (mtls->fep.yStrideOut) {
425 s2 = targetByteChunk / mtls->fep.yStrideOut;
426 } else {
427 s2 = targetByteChunk / mtls->fep.yStrideIn;
428 }
429 mtls->mSliceSize = rsMin(s1, s2);
430
431 if(mtls->mSliceSize < 1) {
432 mtls->mSliceSize = 1;
433 }
434
435 // mtls->mSliceSize = 2;
436 launchThreads(wc_xy, mtls);
437 } else {
438 uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
439 uint32_t s2 = 0;
440
441 // This chooses our slice size to rate limit atomic ops to
442 // one per 16k bytes of reads/writes.
443 if (mtls->fep.eStrideOut) {
444 s2 = targetByteChunk / mtls->fep.eStrideOut;
445 } else {
446 s2 = targetByteChunk / mtls->fep.eStrideIn;
447 }
448 mtls->mSliceSize = rsMin(s1, s2);
449
450 if(mtls->mSliceSize < 1) {
451 mtls->mSliceSize = 1;
452 }
453
454 launchThreads(wc_x, mtls);
455 }
456 mInForEach = false;
457
458 //ALOGE("launch 1");
459 } else {
460 RsForEachStubParamStruct p;
461 memcpy(&p, &mtls->fep, sizeof(p));
462 uint32_t sig = mtls->sig;
463
464 //ALOGE("launch 3");
465 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
466 for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
467 for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
468 for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
469 uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
470 mtls->fep.dimY * p.z + p.y;
471 p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
472 (mtls->fep.eStrideOut * mtls->xStart);
473 p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
474 (mtls->fep.eStrideIn * mtls->xStart);
475 fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
476 }
477 }
478 }
479 }
480 }
481
launchThreads(const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc,MTLaunchStruct * mtls)482 void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
483 const RsScriptCall* sc, MTLaunchStruct* mtls) {
484
485 //android::StopWatch kernel_time("kernel time");
486
487 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
488 const size_t targetByteChunk = 16 * 1024;
489 mInForEach = true;
490 if (mtls->fep.dimY > 1) {
491 uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
492 uint32_t s2 = 0;
493
494 // This chooses our slice size to rate limit atomic ops to
495 // one per 16k bytes of reads/writes.
496 if (mtls->fep.yStrideOut) {
497 s2 = targetByteChunk / mtls->fep.yStrideOut;
498 } else {
499 s2 = targetByteChunk / mtls->fep.yStrideIn;
500 }
501 mtls->mSliceSize = rsMin(s1, s2);
502
503 if(mtls->mSliceSize < 1) {
504 mtls->mSliceSize = 1;
505 }
506
507 // mtls->mSliceSize = 2;
508 launchThreads(wc_xy, mtls);
509 } else {
510 uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
511 uint32_t s2 = 0;
512
513 // This chooses our slice size to rate limit atomic ops to
514 // one per 16k bytes of reads/writes.
515 if (mtls->fep.eStrideOut) {
516 s2 = targetByteChunk / mtls->fep.eStrideOut;
517 } else {
518 s2 = targetByteChunk / mtls->fep.eStrideIn;
519 }
520 mtls->mSliceSize = rsMin(s1, s2);
521
522 if (mtls->mSliceSize < 1) {
523 mtls->mSliceSize = 1;
524 }
525
526 launchThreads(wc_x, mtls);
527 }
528 mInForEach = false;
529
530 //ALOGE("launch 1");
531 } else {
532 RsForEachStubParamStruct p;
533 memcpy(&p, &mtls->fep, sizeof(p));
534 uint32_t sig = mtls->sig;
535
536 // Allocate space for our input base pointers.
537 p.ins = new const void*[inLen];
538
539 // Allocate space for our input stride information.
540 p.eStrideIns = new uint32_t[inLen];
541
542 // Fill our stride information.
543 for (int index = inLen; --index >= 0;) {
544 p.eStrideIns[index] = mtls->fep.inStrides[index].eStride;
545 }
546
547 //ALOGE("launch 3");
548 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
549 uint32_t offset_invariant = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0];
550
551 for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
552 uint32_t offset_part = offset_invariant * p.ar[0];
553
554 for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
555 for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
556 uint32_t offset = offset_part + mtls->fep.dimY * p.z + p.y;
557
558 p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
559 (mtls->fep.eStrideOut * mtls->xStart);
560
561 for (int index = inLen; --index >= 0;) {
562 StridePair &strides = mtls->fep.inStrides[index];
563
564 p.ins[index] = mtls->fep.ptrIns[index] +
565 (strides.yStride * offset) +
566 (strides.eStride * mtls->xStart);
567 }
568
569 /*
570 * The fourth argument is zero here because multi-input
571 * kernels get their stride information from a member of p
572 * that points to an array.
573 */
574 fn(&p, mtls->xStart, mtls->xEnd, 0, mtls->fep.eStrideOut);
575 }
576 }
577 }
578
579 // Free our arrays.
580 delete[] p.ins;
581 delete[] p.eStrideIns;
582 }
583 }
584
setTLS(RsdCpuScriptImpl * sc)585 RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) {
586 //ALOGE("setTls %p", sc);
587 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
588 rsAssert(tls);
589 RsdCpuScriptImpl *old = tls->mImpl;
590 tls->mImpl = sc;
591 tls->mContext = mRSC;
592 if (sc) {
593 tls->mScript = sc->getScript();
594 } else {
595 tls->mScript = NULL;
596 }
597 return old;
598 }
599
symLookup(const char * name)600 const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) {
601 return mSymLookupFn(mRSC, name);
602 }
603
604
createScript(const ScriptC * s,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)605 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s,
606 char const *resName, char const *cacheDir,
607 uint8_t const *bitcode, size_t bitcodeSize,
608 uint32_t flags) {
609
610 RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
611 if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags
612 #ifndef RS_COMPATIBILITY_LIB
613 , getBccPluginName()
614 #endif
615 )) {
616 delete i;
617 return NULL;
618 }
619 return i;
620 }
621
622 extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
623 const Script *s, const Element *e);
624 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
625 const Script *s, const Element *e);
626 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
627 const Script *s, const Element *e);
628 extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
629 const Script *s, const Element *e);
630 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx,
631 const Script *s, const Element *e);
632 extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx,
633 const Script *s, const Element *e);
634 extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
635 const Script *s, const Element *e);
636 extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
637 const Script *s, const Element *e);
638 extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx,
639 const Script *s, const Element *e);
640 extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx,
641 const Script *s, const Element *e);
642
createIntrinsic(const Script * s,RsScriptIntrinsicID iid,Element * e)643 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
644 RsScriptIntrinsicID iid, Element *e) {
645
646 RsdCpuScriptImpl *i = NULL;
647 switch (iid) {
648 case RS_SCRIPT_INTRINSIC_ID_3DLUT:
649 i = rsdIntrinsic_3DLUT(this, s, e);
650 break;
651 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
652 i = rsdIntrinsic_Convolve3x3(this, s, e);
653 break;
654 case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
655 i = rsdIntrinsic_ColorMatrix(this, s, e);
656 break;
657 case RS_SCRIPT_INTRINSIC_ID_LUT:
658 i = rsdIntrinsic_LUT(this, s, e);
659 break;
660 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
661 i = rsdIntrinsic_Convolve5x5(this, s, e);
662 break;
663 case RS_SCRIPT_INTRINSIC_ID_BLUR:
664 i = rsdIntrinsic_Blur(this, s, e);
665 break;
666 case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
667 i = rsdIntrinsic_YuvToRGB(this, s, e);
668 break;
669 case RS_SCRIPT_INTRINSIC_ID_BLEND:
670 i = rsdIntrinsic_Blend(this, s, e);
671 break;
672 case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM:
673 i = rsdIntrinsic_Histogram(this, s, e);
674 break;
675 case RS_SCRIPT_INTRINSIC_ID_RESIZE:
676 i = rsdIntrinsic_Resize(this, s, e);
677 break;
678
679 default:
680 rsAssert(0);
681 }
682
683 return i;
684 }
685
createScriptGroup(const ScriptGroup * sg)686 RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) {
687 CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
688 if (!sgi->init()) {
689 delete sgi;
690 return NULL;
691 }
692 return sgi;
693 }
694