1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21 //#include "rsdBcc.h"
22 //#include "rsdAllocation.h"
23
24 using namespace android;
25 using namespace android::renderscript;
26
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroup * sg)27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28 mCtx = ctx;
29 mSG = sg;
30 }
31
~CpuScriptGroupImpl()32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34 }
35
init()36 bool CpuScriptGroupImpl::init() {
37 return true;
38 }
39
setInput(const ScriptKernelID * kid,Allocation * a)40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41 }
42
setOutput(const ScriptKernelID * kid,Allocation * a)43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44 }
45
46
47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
48 uint32_t xstart, uint32_t xend,
49 uint32_t instep, uint32_t outstep);
50
scriptGroupRoot(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
52 uint32_t xstart, uint32_t xend,
53 uint32_t instep, uint32_t outstep) {
54
55
56 const ScriptList *sl = (const ScriptList *)p->usr;
57 RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
58 const void *oldUsr = p->usr;
59
60 for(size_t ct=0; ct < sl->count; ct++) {
61 ScriptGroupRootFunc_t func;
62 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
63 mp->usr = sl->usrPtrs[ct];
64
65 mp->ptrIn = NULL;
66 mp->in = NULL;
67 mp->ptrOut = NULL;
68 mp->out = NULL;
69
70 uint32_t istep = 0;
71 uint32_t ostep = 0;
72
73 if (sl->ins[ct]) {
74 mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
75 istep = sl->ins[ct]->mHal.state.elementSizeBytes;
76 mp->in = mp->ptrIn;
77 if (sl->inExts[ct]) {
78 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
79 } else {
80 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
81 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
82 }
83 }
84 }
85
86 if (sl->outs[ct]) {
87 mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
88 mp->out = mp->ptrOut;
89 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
90 if (sl->outExts[ct]) {
91 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
92 } else {
93 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
94 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
95 }
96 }
97 }
98
99 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
100 func(p, xstart, xend, istep, ostep);
101 }
102 //ALOGE("script group root");
103
104 //ConvolveParams *cp = (ConvolveParams *)p->usr;
105
106 mp->usr = oldUsr;
107 }
108
109
110
execute()111 void CpuScriptGroupImpl::execute() {
112 Vector<Allocation *> ins;
113 Vector<bool> inExts;
114 Vector<Allocation *> outs;
115 Vector<bool> outExts;
116 Vector<const ScriptKernelID *> kernels;
117 bool fieldDep = false;
118
119 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
120 ScriptGroup::Node *n = mSG->mNodes[ct];
121 Script *s = n->mKernels[0]->mScript;
122 if (s->hasObjectSlots()) {
123 // Disable the ScriptGroup optimization if we have global RS
124 // objects that might interfere between kernels.
125 fieldDep = true;
126 }
127
128 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129
130 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134 }
135 }
136
137 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138 const ScriptKernelID *k = n->mKernels[ct2];
139 Allocation *ain = NULL;
140 Allocation *aout = NULL;
141 bool inExt = false;
142 bool outExt = false;
143
144 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
145 if (n->mInputs[ct3]->mDstKernel.get() == k) {
146 ain = n->mInputs[ct3]->mAlloc.get();
147 break;
148 }
149 }
150 if (ain == NULL) {
151 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
152 if (mSG->mInputs[ct3]->mKernel == k) {
153 ain = mSG->mInputs[ct3]->mAlloc.get();
154 inExt = true;
155 break;
156 }
157 }
158 }
159
160 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
161 if (n->mOutputs[ct3]->mSource.get() == k) {
162 aout = n->mOutputs[ct3]->mAlloc.get();
163 if(n->mOutputs[ct3]->mDstField.get() != NULL) {
164 fieldDep = true;
165 }
166 break;
167 }
168 }
169 if (aout == NULL) {
170 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
171 if (mSG->mOutputs[ct3]->mKernel == k) {
172 aout = mSG->mOutputs[ct3]->mAlloc.get();
173 outExt = true;
174 break;
175 }
176 }
177 }
178
179 rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
180 (k->mHasKernelInput == (ain != NULL)));
181
182 ins.add(ain);
183 inExts.add(inExt);
184 outs.add(aout);
185 outExts.add(outExt);
186 kernels.add(k);
187 }
188
189 }
190
191 MTLaunchStruct mtls;
192
193 if(fieldDep) {
194 for (size_t ct=0; ct < ins.size(); ct++) {
195 Script *s = kernels[ct]->mScript;
196 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
197 uint32_t slot = kernels[ct]->mSlot;
198
199 si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
200 si->forEachKernelSetup(slot, &mtls);
201 si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
202 mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
203 si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
204 }
205 } else {
206 ScriptList sl;
207 sl.ins = ins.array();
208 sl.outs = outs.array();
209 sl.kernels = kernels.array();
210 sl.count = kernels.size();
211
212 Vector<const void *> usrPtrs;
213 Vector<const void *> fnPtrs;
214 Vector<uint32_t> sigs;
215 for (size_t ct=0; ct < kernels.size(); ct++) {
216 Script *s = kernels[ct]->mScript;
217 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
218
219 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
220 fnPtrs.add((void *)mtls.kernel);
221 usrPtrs.add(mtls.fep.usr);
222 sigs.add(mtls.fep.usrLen);
223 si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
224 }
225 sl.sigs = sigs.array();
226 sl.usrPtrs = usrPtrs.array();
227 sl.fnPtrs = fnPtrs.array();
228 sl.inExts = inExts.array();
229 sl.outExts = outExts.array();
230
231 Script *s = kernels[0]->mScript;
232 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
233 si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
234 mtls.script = NULL;
235 mtls.kernel = (void (*)())&scriptGroupRoot;
236 mtls.fep.usr = &sl;
237 mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
238
239 for (size_t ct=0; ct < kernels.size(); ct++) {
240 Script *s = kernels[ct]->mScript;
241 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
242 si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
243 }
244 }
245 }
246
247
248