1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuScriptGroup.h"
20
21 #ifndef RS_SERVER
22 #include <bcc/BCCContext.h>
23 #include <bcc/Renderscript/RSCompilerDriver.h>
24 #include <bcc/Renderscript/RSExecutable.h>
25 #include <bcc/Renderscript/RSInfo.h>
26 #endif
27
28 #include "rsScript.h"
29 #include "rsScriptGroup.h"
30 #include "rsCpuScriptGroup.h"
31 //#include "rsdBcc.h"
32 //#include "rsdAllocation.h"
33
34 using namespace android;
35 using namespace android::renderscript;
36
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroup * sg)37 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
38 mCtx = ctx;
39 mSG = sg;
40 }
41
~CpuScriptGroupImpl()42 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
43
44 }
45
init()46 bool CpuScriptGroupImpl::init() {
47 return true;
48 }
49
setInput(const ScriptKernelID * kid,Allocation * a)50 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
51 }
52
setOutput(const ScriptKernelID * kid,Allocation * a)53 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
54 }
55
56
57 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
58 uint32_t xstart, uint32_t xend,
59 uint32_t instep, uint32_t outstep);
60
scriptGroupRoot(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)61 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
62 uint32_t xstart, uint32_t xend,
63 uint32_t instep, uint32_t outstep) {
64
65
66 const ScriptList *sl = (const ScriptList *)p->usr;
67 RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
68 const void *oldUsr = p->usr;
69
70 for(size_t ct=0; ct < sl->count; ct++) {
71 ScriptGroupRootFunc_t func;
72 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
73 mp->usr = sl->usrPtrs[ct];
74
75 mp->ptrIn = NULL;
76 mp->in = NULL;
77 mp->ptrOut = NULL;
78 mp->out = NULL;
79
80 if (sl->ins[ct]) {
81 mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
82 mp->in = mp->ptrIn;
83 if (sl->inExts[ct]) {
84 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
85 } else {
86 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
87 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
88 }
89 }
90 }
91
92 if (sl->outs[ct]) {
93 mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
94 mp->out = mp->ptrOut;
95 if (sl->outExts[ct]) {
96 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
97 } else {
98 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
99 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
100 }
101 }
102 }
103
104 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
105 func(p, xstart, xend, instep, outstep);
106 }
107 //ALOGE("script group root");
108
109 //ConvolveParams *cp = (ConvolveParams *)p->usr;
110
111 mp->usr = oldUsr;
112 }
113
114
115
execute()116 void CpuScriptGroupImpl::execute() {
117 Vector<Allocation *> ins;
118 Vector<bool> inExts;
119 Vector<Allocation *> outs;
120 Vector<bool> outExts;
121 Vector<const ScriptKernelID *> kernels;
122 bool fieldDep = false;
123
124 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
125 ScriptGroup::Node *n = mSG->mNodes[ct];
126 Script *s = n->mKernels[0]->mScript;
127
128 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129
130 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134 }
135 }
136
137 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138 const ScriptKernelID *k = n->mKernels[ct2];
139 Allocation *ain = NULL;
140 Allocation *aout = NULL;
141 bool inExt = false;
142 bool outExt = false;
143
144 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
145 if (n->mInputs[ct3]->mDstKernel.get() == k) {
146 ain = n->mInputs[ct3]->mAlloc.get();
147 //ALOGE(" link in %p", ain);
148 }
149 }
150 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
151 if (mSG->mInputs[ct3]->mKernel == k) {
152 ain = mSG->mInputs[ct3]->mAlloc.get();
153 inExt = true;
154 //ALOGE(" io in %p", ain);
155 }
156 }
157
158 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
159 if (n->mOutputs[ct3]->mSource.get() == k) {
160 aout = n->mOutputs[ct3]->mAlloc.get();
161 if(n->mOutputs[ct3]->mDstField.get() != NULL) {
162 fieldDep = true;
163 }
164 //ALOGE(" link out %p", aout);
165 }
166 }
167 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
168 if (mSG->mOutputs[ct3]->mKernel == k) {
169 aout = mSG->mOutputs[ct3]->mAlloc.get();
170 outExt = true;
171 //ALOGE(" io out %p", aout);
172 }
173 }
174
175 if ((k->mHasKernelOutput == (aout != NULL)) &&
176 (k->mHasKernelInput == (ain != NULL))) {
177 ins.add(ain);
178 inExts.add(inExt);
179 outs.add(aout);
180 outExts.add(outExt);
181 kernels.add(k);
182 }
183 }
184
185 }
186
187 MTLaunchStruct mtls;
188
189 if(fieldDep) {
190 for (size_t ct=0; ct < ins.size(); ct++) {
191 Script *s = kernels[ct]->mScript;
192 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
193 uint32_t slot = kernels[ct]->mSlot;
194
195 si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
196 si->forEachKernelSetup(slot, &mtls);
197 mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
198 }
199 } else {
200 ScriptList sl;
201 sl.ins = ins.array();
202 sl.outs = outs.array();
203 sl.kernels = kernels.array();
204 sl.count = kernels.size();
205
206 Vector<const void *> usrPtrs;
207 Vector<const void *> fnPtrs;
208 Vector<uint32_t> sigs;
209 for (size_t ct=0; ct < kernels.size(); ct++) {
210 Script *s = kernels[ct]->mScript;
211 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212
213 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
214 fnPtrs.add((void *)mtls.kernel);
215 usrPtrs.add(mtls.fep.usr);
216 sigs.add(mtls.fep.usrLen);
217 }
218 sl.sigs = sigs.array();
219 sl.usrPtrs = usrPtrs.array();
220 sl.fnPtrs = fnPtrs.array();
221 sl.inExts = inExts.array();
222 sl.outExts = outExts.array();
223
224 Script *s = kernels[0]->mScript;
225 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
226 si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
227 mtls.script = NULL;
228 mtls.kernel = (void (*)())&scriptGroupRoot;
229 mtls.fep.usr = &sl;
230 mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
231 }
232 }
233
234
235