• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic {
29 public:
30     virtual void populateScript(Script *);
31     virtual void invokeFreeChildren();
32 
33     virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34 
35     virtual ~RsdCpuScriptIntrinsic3DLUT();
36     RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
37 
38 protected:
39     ObjectBaseRef<Allocation> mLUT;
40 
41     static void kernel(const RsForEachStubParamStruct *p,
42                        uint32_t xstart, uint32_t xend,
43                        uint32_t instep, uint32_t outstep);
44 };
45 
46 }
47 }
48 
49 
setGlobalObj(uint32_t slot,ObjectBase * data)50 void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) {
51     rsAssert(slot == 0);
52     mLUT.set(static_cast<Allocation *>(data));
53 }
54 
55 extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count,
56                                       void const *lut,
57                                       int32_t pitchy, int32_t pitchz,
58                                       int dimx, int dimy, int dimz);
59 
60 
kernel(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)61 void RsdCpuScriptIntrinsic3DLUT::kernel(const RsForEachStubParamStruct *p,
62                                       uint32_t xstart, uint32_t xend,
63                                       uint32_t instep, uint32_t outstep) {
64     RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
65 
66     uchar4 *out = (uchar4 *)p->out;
67     uchar4 *in = (uchar4 *)p->in;
68     uint32_t x1 = xstart;
69     uint32_t x2 = xend;
70 
71     const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr;
72 
73     int4 dims = {
74         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1),
75         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1),
76         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1),
77         -1
78     };
79     const float4 m = (float4)(1.f / 255.f) * convert_float4(dims);
80     const int4 coordMul = convert_int4(m * (float4)0x8000);
81     const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride;
82     const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY;
83 
84     //ALOGE("strides %zu %zu", stride_y, stride_z);
85 
86 #if defined(ARCH_ARM_USE_INTRINSICS)
87     if (gArchUseSIMD) {
88         int32_t len = x2 - x1;
89         if(len > 0) {
90             rsdIntrinsic3DLUT_K(out, in, len,
91                                 bp, stride_y, stride_z,
92                                 dims.x, dims.y, dims.z);
93             x1 += len;
94             out += len;
95             in += len;
96         }
97     }
98 #endif
99 
100     while (x1 < x2) {
101         int4 baseCoord = convert_int4(*in) * coordMul;
102         int4 coord1 = baseCoord >> (int4)15;
103         //int4 coord2 = min(coord1 + 1, gDims - 1);
104 
105         int4 weight2 = baseCoord & 0x7fff;
106         int4 weight1 = (int4)0x8000 - weight2;
107 
108         //ALOGE("coord1      %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
109         const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z);
110         const uchar4 *pt_00 = (const uchar4 *)&bp2[0];
111         const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y];
112         const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z];
113         const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z];
114 
115         uint4 v000 = convert_uint4(pt_00[0]);
116         uint4 v100 = convert_uint4(pt_00[1]);
117         uint4 v010 = convert_uint4(pt_10[0]);
118         uint4 v110 = convert_uint4(pt_10[1]);
119         uint4 v001 = convert_uint4(pt_01[0]);
120         uint4 v101 = convert_uint4(pt_01[1]);
121         uint4 v011 = convert_uint4(pt_11[0]);
122         uint4 v111 = convert_uint4(pt_11[1]);
123 
124         uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7;
125         uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7;
126         uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7;
127         uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7;
128 
129         uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15;
130         uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15;
131 
132         uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15;
133         uint4 v2 = (v + 0x7f) >> (int4)8;
134 
135         uchar4 ret = convert_uchar4(v2);
136         ret.w = in->w;
137 
138         #if 0
139         if (!x1) {
140             ALOGE("in          %08x %08x %08x %08x", in->r, in->g, in->b, in->a);
141             ALOGE("baseCoord   %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w);
142             ALOGE("coord1      %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
143             ALOGE("weight1     %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w);
144             ALOGE("weight2     %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w);
145 
146             ALOGE("v000        %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w);
147             ALOGE("v100        %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w);
148             ALOGE("yz00        %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w);
149             ALOGE("z0          %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w);
150 
151             ALOGE("v           %08x %08x %08x %08x", v.x, v.y, v.z, v.w);
152             ALOGE("v2          %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w);
153         }
154         #endif
155         *out = ret;
156 
157 
158         in++;
159         out++;
160         x1++;
161     }
162 }
163 
RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)164 RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx,
165                                                      const Script *s, const Element *e)
166             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
167 
168     mRootPtr = &kernel;
169 }
170 
~RsdCpuScriptIntrinsic3DLUT()171 RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() {
172 }
173 
populateScript(Script * s)174 void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) {
175     s->mHal.info.exportedVariableCount = 1;
176 }
177 
invokeFreeChildren()178 void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() {
179     mLUT.clear();
180 }
181 
182 
rsdIntrinsic_3DLUT(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)183 RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
184                                     const Script *s, const Element *e) {
185 
186     return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
187 }
188 
189 
190