• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuScriptGroup.h"
20 
21 #include <malloc.h>
22 #include "rsContext.h"
23 
24 #include <sys/types.h>
25 #include <sys/resource.h>
26 #include <sched.h>
27 #include <sys/syscall.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <fcntl.h>
34 
35 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
36 #include <cutils/properties.h>
37 #include "utils/StopWatch.h"
38 #endif
39 
40 #ifdef RS_SERVER
41 // Android exposes gettid(), standard Linux does not
gettid()42 static pid_t gettid() {
43     return syscall(SYS_gettid);
44 }
45 #endif
46 
47 using namespace android;
48 using namespace android::renderscript;
49 
50 typedef void (*outer_foreach_t)(
51     const android::renderscript::RsForEachStubParamStruct *,
52     uint32_t x1, uint32_t x2,
53     uint32_t instep, uint32_t outstep);
54 
55 
56 static pthread_key_t gThreadTLSKey = 0;
57 static uint32_t gThreadTLSKeyCount = 0;
58 static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
59 
60 bool android::renderscript::gArchUseSIMD = false;
61 
~RsdCpuReference()62 RsdCpuReference::~RsdCpuReference() {
63 }
64 
create(Context * rsc,uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn,bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,RSSelectRTCallback pSelectRTCallback)65 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
66         uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
67 #ifndef RS_COMPATIBILITY_LIB
68         , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
69         RSSelectRTCallback pSelectRTCallback
70 #endif
71         ) {
72 
73     RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
74     if (!cpu) {
75         return NULL;
76     }
77     if (!cpu->init(version_major, version_minor, lfn, slfn)) {
78         delete cpu;
79         return NULL;
80     }
81 
82 #ifndef RS_COMPATIBILITY_LIB
83     cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
84     cpu->setSelectRTCallback(pSelectRTCallback);
85 #endif
86 
87     return cpu;
88 }
89 
90 
getTlsContext()91 Context * RsdCpuReference::getTlsContext() {
92     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
93     return tls->mContext;
94 }
95 
getTlsScript()96 const Script * RsdCpuReference::getTlsScript() {
97     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
98     return tls->mScript;
99 }
100 
getThreadTLSKey()101 pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; }
102 
103 ////////////////////////////////////////////////////////////
104 ///
105 
RsdCpuReferenceImpl(Context * rsc)106 RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) {
107     mRSC = rsc;
108 
109     version_major = 0;
110     version_minor = 0;
111     mInForEach = false;
112     memset(&mWorkers, 0, sizeof(mWorkers));
113     memset(&mTlsStruct, 0, sizeof(mTlsStruct));
114     mExit = false;
115 #ifndef RS_COMPATIBILITY_LIB
116     mLinkRuntimeCallback = NULL;
117     mSelectRTCallback = NULL;
118     mSetupCompilerCallback = NULL;
119 #endif
120 }
121 
122 
helperThreadProc(void * vrsc)123 void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
124     RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
125 
126     uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
127 
128     //ALOGV("RS helperThread starting %p idx=%i", dc, idx);
129 
130     dc->mWorkers.mLaunchSignals[idx].init();
131     dc->mWorkers.mNativeThreadId[idx] = gettid();
132 
133     memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct));
134     int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct);
135     if (status) {
136         ALOGE("pthread_setspecific %i", status);
137     }
138 
139 #if 0
140     typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
141     cpu_set_t cpuset;
142     memset(&cpuset, 0, sizeof(cpuset));
143     cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
144     int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
145               sizeof(cpuset), &cpuset);
146     ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
147 #endif
148 
149     while (!dc->mExit) {
150         dc->mWorkers.mLaunchSignals[idx].wait();
151         if (dc->mWorkers.mLaunchCallback) {
152            // idx +1 is used because the calling thread is always worker 0.
153            dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
154         }
155         __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
156         dc->mWorkers.mCompleteSignal.set();
157     }
158 
159     //ALOGV("RS helperThread exited %p idx=%i", dc, idx);
160     return NULL;
161 }
162 
launchThreads(WorkerCallback_t cbk,void * data)163 void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
164     mWorkers.mLaunchData = data;
165     mWorkers.mLaunchCallback = cbk;
166 
167     // fast path for very small launches
168     MTLaunchStruct *mtls = (MTLaunchStruct *)data;
169     if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) {
170         if (mWorkers.mLaunchCallback) {
171             mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
172         }
173         return;
174     }
175 
176     mWorkers.mRunningCount = mWorkers.mCount;
177     __sync_synchronize();
178 
179     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
180         mWorkers.mLaunchSignals[ct].set();
181     }
182 
183     // We use the calling thread as one of the workers so we can start without
184     // the delay of the thread wakeup.
185     if (mWorkers.mLaunchCallback) {
186         mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
187     }
188 
189     while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
190         mWorkers.mCompleteSignal.wait();
191     }
192 }
193 
194 
lockMutex()195 void RsdCpuReferenceImpl::lockMutex() {
196     pthread_mutex_lock(&gInitMutex);
197 }
198 
unlockMutex()199 void RsdCpuReferenceImpl::unlockMutex() {
200     pthread_mutex_unlock(&gInitMutex);
201 }
202 
203 #if defined(ARCH_ARM_HAVE_VFP)
204 static int
read_file(const char * pathname,char * buffer,size_t buffsize)205 read_file(const char*  pathname, char*  buffer, size_t  buffsize)
206 {
207     int  fd, len;
208 
209     fd = open(pathname, O_RDONLY);
210     if (fd < 0)
211         return -1;
212 
213     do {
214         len = read(fd, buffer, buffsize);
215     } while (len < 0 && errno == EINTR);
216 
217     close(fd);
218 
219     return len;
220 }
221 
GetCpuInfo()222 static void GetCpuInfo() {
223     char cpuinfo[4096];
224     int  cpuinfo_len;
225 
226     cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, sizeof cpuinfo);
227     if (cpuinfo_len < 0)  /* should not happen */ {
228         return;
229     }
230 
231     gArchUseSIMD = !!strstr(cpuinfo, " neon");
232 }
233 #endif // ARCH_ARM_HAVE_VFP
234 
init(uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn)235 bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
236                                sym_lookup_t lfn, script_lookup_t slfn) {
237 
238     mSymLookupFn = lfn;
239     mScriptLookupFn = slfn;
240 
241     lockMutex();
242     if (!gThreadTLSKeyCount) {
243         int status = pthread_key_create(&gThreadTLSKey, NULL);
244         if (status) {
245             ALOGE("Failed to init thread tls key.");
246             unlockMutex();
247             return false;
248         }
249     }
250     gThreadTLSKeyCount++;
251     unlockMutex();
252 
253     mTlsStruct.mContext = mRSC;
254     mTlsStruct.mScript = NULL;
255     int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct);
256     if (status) {
257         ALOGE("pthread_setspecific %i", status);
258     }
259 
260 #if defined(ARCH_ARM_HAVE_VFP)
261     GetCpuInfo();
262 #endif
263 
264     int cpu = sysconf(_SC_NPROCESSORS_ONLN);
265     if(mRSC->props.mDebugMaxThreads) {
266         cpu = mRSC->props.mDebugMaxThreads;
267     }
268     if (cpu < 2) {
269         mWorkers.mCount = 0;
270         return true;
271     }
272 
273     // Subtract one from the cpu count because we also use the command thread as a worker.
274     mWorkers.mCount = (uint32_t)(cpu - 1);
275 
276     ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
277 
278     mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
279     mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
280     mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
281     mWorkers.mLaunchCallback = NULL;
282 
283     mWorkers.mCompleteSignal.init();
284 
285     mWorkers.mRunningCount = mWorkers.mCount;
286     mWorkers.mLaunchCount = 0;
287     __sync_synchronize();
288 
289     pthread_attr_t threadAttr;
290     status = pthread_attr_init(&threadAttr);
291     if (status) {
292         ALOGE("Failed to init thread attribute.");
293         return false;
294     }
295 
296     for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
297         status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
298         if (status) {
299             mWorkers.mCount = ct;
300             ALOGE("Created fewer than expected number of RS threads.");
301             break;
302         }
303     }
304     while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
305         usleep(100);
306     }
307 
308     pthread_attr_destroy(&threadAttr);
309     return true;
310 }
311 
312 
setPriority(int32_t priority)313 void RsdCpuReferenceImpl::setPriority(int32_t priority) {
314     for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
315         setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority);
316     }
317 }
318 
~RsdCpuReferenceImpl()319 RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
320     mExit = true;
321     mWorkers.mLaunchData = NULL;
322     mWorkers.mLaunchCallback = NULL;
323     mWorkers.mRunningCount = mWorkers.mCount;
324     __sync_synchronize();
325     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
326         mWorkers.mLaunchSignals[ct].set();
327     }
328     void *res;
329     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
330         pthread_join(mWorkers.mThreadId[ct], &res);
331     }
332     rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
333 
334     // Global structure cleanup.
335     lockMutex();
336     --gThreadTLSKeyCount;
337     if (!gThreadTLSKeyCount) {
338         pthread_key_delete(gThreadTLSKey);
339     }
340     unlockMutex();
341 
342 }
343 
344 typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
345 
wc_xy(void * usr,uint32_t idx)346 static void wc_xy(void *usr, uint32_t idx) {
347     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
348     RsForEachStubParamStruct p;
349     memcpy(&p, &mtls->fep, sizeof(p));
350     p.lid = idx;
351     uint32_t sig = mtls->sig;
352 
353     outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
354     while (1) {
355         uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
356         uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
357         uint32_t yEnd = yStart + mtls->mSliceSize;
358         yEnd = rsMin(yEnd, mtls->yEnd);
359         if (yEnd <= yStart) {
360             return;
361         }
362 
363         //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
364         //ALOGE("usr ptr in %p,  out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
365 
366         for (p.y = yStart; p.y < yEnd; p.y++) {
367             p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
368                     (mtls->fep.eStrideOut * mtls->xStart);
369             p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
370                    (mtls->fep.eStrideIn * mtls->xStart);
371             fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
372         }
373     }
374 }
375 
wc_x(void * usr,uint32_t idx)376 static void wc_x(void *usr, uint32_t idx) {
377     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
378     RsForEachStubParamStruct p;
379     memcpy(&p, &mtls->fep, sizeof(p));
380     p.lid = idx;
381     uint32_t sig = mtls->sig;
382 
383     outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
384     while (1) {
385         uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
386         uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
387         uint32_t xEnd = xStart + mtls->mSliceSize;
388         xEnd = rsMin(xEnd, mtls->xEnd);
389         if (xEnd <= xStart) {
390             return;
391         }
392 
393         //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
394         //ALOGE("usr ptr in %p,  out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
395 
396         p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
397         p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
398         fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
399     }
400 }
401 
launchThreads(const Allocation * ain,Allocation * aout,const RsScriptCall * sc,MTLaunchStruct * mtls)402 void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
403                                      const RsScriptCall *sc, MTLaunchStruct *mtls) {
404 
405     //android::StopWatch kernel_time("kernel time");
406 
407     if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
408         const size_t targetByteChunk = 16 * 1024;
409         mInForEach = true;
410         if (mtls->fep.dimY > 1) {
411             uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
412             uint32_t s2 = 0;
413 
414             // This chooses our slice size to rate limit atomic ops to
415             // one per 16k bytes of reads/writes.
416             if (mtls->fep.yStrideOut) {
417                 s2 = targetByteChunk / mtls->fep.yStrideOut;
418             } else {
419                 s2 = targetByteChunk / mtls->fep.yStrideIn;
420             }
421             mtls->mSliceSize = rsMin(s1, s2);
422 
423             if(mtls->mSliceSize < 1) {
424                 mtls->mSliceSize = 1;
425             }
426 
427          //   mtls->mSliceSize = 2;
428             launchThreads(wc_xy, mtls);
429         } else {
430             uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
431             uint32_t s2 = 0;
432 
433             // This chooses our slice size to rate limit atomic ops to
434             // one per 16k bytes of reads/writes.
435             if (mtls->fep.eStrideOut) {
436                 s2 = targetByteChunk / mtls->fep.eStrideOut;
437             } else {
438                 s2 = targetByteChunk / mtls->fep.eStrideIn;
439             }
440             mtls->mSliceSize = rsMin(s1, s2);
441 
442             if(mtls->mSliceSize < 1) {
443                 mtls->mSliceSize = 1;
444             }
445 
446             launchThreads(wc_x, mtls);
447         }
448         mInForEach = false;
449 
450         //ALOGE("launch 1");
451     } else {
452         RsForEachStubParamStruct p;
453         memcpy(&p, &mtls->fep, sizeof(p));
454         uint32_t sig = mtls->sig;
455 
456         //ALOGE("launch 3");
457         outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
458         for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
459             for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
460                 for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
461                     uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
462                                       mtls->fep.dimY * p.z + p.y;
463                     p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
464                             (mtls->fep.eStrideOut * mtls->xStart);
465                     p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
466                            (mtls->fep.eStrideIn * mtls->xStart);
467                     fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
468                 }
469             }
470         }
471     }
472 }
473 
setTLS(RsdCpuScriptImpl * sc)474 RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) {
475     //ALOGE("setTls %p", sc);
476     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
477     rsAssert(tls);
478     RsdCpuScriptImpl *old = tls->mImpl;
479     tls->mImpl = sc;
480     tls->mContext = mRSC;
481     if (sc) {
482         tls->mScript = sc->getScript();
483     } else {
484         tls->mScript = NULL;
485     }
486     return old;
487 }
488 
symLookup(const char * name)489 const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) {
490     return mSymLookupFn(mRSC, name);
491 }
492 
493 
createScript(const ScriptC * s,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)494 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s,
495                                     char const *resName, char const *cacheDir,
496                                     uint8_t const *bitcode, size_t bitcodeSize,
497                                     uint32_t flags) {
498 
499     RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
500     if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags)) {
501         delete i;
502         return NULL;
503     }
504     return i;
505 }
506 
507 extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
508                                              const Script *s, const Element *e);
509 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
510                                                    const Script *s, const Element *e);
511 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
512                                                    const Script *s, const Element *e);
513 extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
514                                            const Script *s, const Element *e);
515 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx,
516                                                    const Script *s, const Element *e);
517 extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx,
518                                             const Script *s, const Element *e);
519 extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
520                                                 const Script *s, const Element *e);
521 extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
522                                              const Script *s, const Element *e);
523 extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx,
524                                                  const Script *s, const Element *e);
525 
createIntrinsic(const Script * s,RsScriptIntrinsicID iid,Element * e)526 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
527                                     RsScriptIntrinsicID iid, Element *e) {
528 
529     RsdCpuScriptImpl *i = NULL;
530     switch (iid) {
531     case RS_SCRIPT_INTRINSIC_ID_3DLUT:
532         i = rsdIntrinsic_3DLUT(this, s, e);
533         break;
534     case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
535         i = rsdIntrinsic_Convolve3x3(this, s, e);
536         break;
537     case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
538         i = rsdIntrinsic_ColorMatrix(this, s, e);
539         break;
540     case RS_SCRIPT_INTRINSIC_ID_LUT:
541         i = rsdIntrinsic_LUT(this, s, e);
542         break;
543     case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
544         i = rsdIntrinsic_Convolve5x5(this, s, e);
545         break;
546     case RS_SCRIPT_INTRINSIC_ID_BLUR:
547         i = rsdIntrinsic_Blur(this, s, e);
548         break;
549     case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
550         i = rsdIntrinsic_YuvToRGB(this, s, e);
551         break;
552     case RS_SCRIPT_INTRINSIC_ID_BLEND:
553         i = rsdIntrinsic_Blend(this, s, e);
554         break;
555     case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM:
556         i = rsdIntrinsic_Histogram(this, s, e);
557         break;
558 
559     default:
560         rsAssert(0);
561     }
562 
563     return i;
564 }
565 
createScriptGroup(const ScriptGroup * sg)566 RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) {
567     CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
568     if (!sgi->init()) {
569         delete sgi;
570         return NULL;
571     }
572     return sgi;
573 }
574 
575 
576