• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21 //#include "rsdBcc.h"
22 //#include "rsdAllocation.h"
23 
24 using namespace android;
25 using namespace android::renderscript;
26 
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroup * sg)27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28     mCtx = ctx;
29     mSG = sg;
30 }
31 
~CpuScriptGroupImpl()32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33 
34 }
35 
init()36 bool CpuScriptGroupImpl::init() {
37     return true;
38 }
39 
setInput(const ScriptKernelID * kid,Allocation * a)40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41 }
42 
setOutput(const ScriptKernelID * kid,Allocation * a)43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44 }
45 
46 
47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
48                                       uint32_t xstart, uint32_t xend,
49                                       uint32_t instep, uint32_t outstep);
50 
scriptGroupRoot(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
52                                          uint32_t xstart, uint32_t xend,
53                                          uint32_t instep, uint32_t outstep) {
54 
55 
56     const ScriptList *sl = (const ScriptList *)p->usr;
57     RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
58     const void *oldUsr = p->usr;
59 
60     for(size_t ct=0; ct < sl->count; ct++) {
61         ScriptGroupRootFunc_t func;
62         func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
63         mp->usr = sl->usrPtrs[ct];
64 
65         mp->ptrIn = NULL;
66         mp->in = NULL;
67         mp->ptrOut = NULL;
68         mp->out = NULL;
69 
70         uint32_t istep = 0;
71         uint32_t ostep = 0;
72 
73         if (sl->ins[ct]) {
74             mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
75             istep = sl->ins[ct]->mHal.state.elementSizeBytes;
76             mp->in = mp->ptrIn;
77             if (sl->inExts[ct]) {
78                 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
79             } else {
80                 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
81                     mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
82                 }
83             }
84         }
85 
86         if (sl->outs[ct]) {
87             mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
88             mp->out = mp->ptrOut;
89             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
90             if (sl->outExts[ct]) {
91                 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
92             } else {
93                 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
94                     mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
95                 }
96             }
97         }
98 
99         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
100         func(p, xstart, xend, istep, ostep);
101     }
102     //ALOGE("script group root");
103 
104     //ConvolveParams *cp = (ConvolveParams *)p->usr;
105 
106     mp->usr = oldUsr;
107 }
108 
109 
110 
execute()111 void CpuScriptGroupImpl::execute() {
112     Vector<Allocation *> ins;
113     Vector<bool> inExts;
114     Vector<Allocation *> outs;
115     Vector<bool> outExts;
116     Vector<const ScriptKernelID *> kernels;
117     bool fieldDep = false;
118 
119     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
120         ScriptGroup::Node *n = mSG->mNodes[ct];
121         Script *s = n->mKernels[0]->mScript;
122         if (s->hasObjectSlots()) {
123             // Disable the ScriptGroup optimization if we have global RS
124             // objects that might interfere between kernels.
125             fieldDep = true;
126         }
127 
128         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129 
130         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134             }
135         }
136 
137         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138             const ScriptKernelID *k = n->mKernels[ct2];
139             Allocation *ain = NULL;
140             Allocation *aout = NULL;
141             bool inExt = false;
142             bool outExt = false;
143 
144             if (k->mScript->hasObjectSlots()) {
145                 // Disable the ScriptGroup optimization if we have global RS
146                 // objects that might interfere between kernels.
147                 fieldDep = true;
148             }
149 
150             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
151                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
152                     ain = n->mInputs[ct3]->mAlloc.get();
153                     //ALOGE(" link in %p", ain);
154                 }
155             }
156             for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
157                 if (mSG->mInputs[ct3]->mKernel == k) {
158                     ain = mSG->mInputs[ct3]->mAlloc.get();
159                     inExt = true;
160                     //ALOGE(" io in %p", ain);
161                 }
162             }
163 
164             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
165                 if (n->mOutputs[ct3]->mSource.get() == k) {
166                     aout = n->mOutputs[ct3]->mAlloc.get();
167                     if(n->mOutputs[ct3]->mDstField.get() != NULL) {
168                         fieldDep = true;
169                     }
170                     //ALOGE(" link out %p", aout);
171                 }
172             }
173             for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
174                 if (mSG->mOutputs[ct3]->mKernel == k) {
175                     aout = mSG->mOutputs[ct3]->mAlloc.get();
176                     outExt = true;
177                     //ALOGE(" io out %p", aout);
178                 }
179             }
180 
181             if ((k->mHasKernelOutput == (aout != NULL)) &&
182                 (k->mHasKernelInput == (ain != NULL))) {
183                 ins.add(ain);
184                 inExts.add(inExt);
185                 outs.add(aout);
186                 outExts.add(outExt);
187                 kernels.add(k);
188             }
189         }
190 
191     }
192 
193     MTLaunchStruct mtls;
194 
195     if(fieldDep) {
196         for (size_t ct=0; ct < ins.size(); ct++) {
197             Script *s = kernels[ct]->mScript;
198             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
199             uint32_t slot = kernels[ct]->mSlot;
200 
201             si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
202             si->forEachKernelSetup(slot, &mtls);
203             si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
204             mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
205             si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
206         }
207     } else {
208         ScriptList sl;
209         sl.ins = ins.array();
210         sl.outs = outs.array();
211         sl.kernels = kernels.array();
212         sl.count = kernels.size();
213 
214         Vector<const void *> usrPtrs;
215         Vector<const void *> fnPtrs;
216         Vector<uint32_t> sigs;
217         for (size_t ct=0; ct < kernels.size(); ct++) {
218             Script *s = kernels[ct]->mScript;
219             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
220 
221             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
222             fnPtrs.add((void *)mtls.kernel);
223             usrPtrs.add(mtls.fep.usr);
224             sigs.add(mtls.fep.usrLen);
225             si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
226         }
227         sl.sigs = sigs.array();
228         sl.usrPtrs = usrPtrs.array();
229         sl.fnPtrs = fnPtrs.array();
230         sl.inExts = inExts.array();
231         sl.outExts = outExts.array();
232 
233         Script *s = kernels[0]->mScript;
234         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
235         si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
236         mtls.script = NULL;
237         mtls.kernel = (void (*)())&scriptGroupRoot;
238         mtls.fep.usr = &sl;
239         mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
240 
241         for (size_t ct=0; ct < kernels.size(); ct++) {
242             Script *s = kernels[ct]->mScript;
243             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
244             si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
245         }
246     }
247 }
248 
249 
250