• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "GGLAssembler.h"
22 
23 namespace android {
24 
25 // ----------------------------------------------------------------------------
26 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)27 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
28 {
29     const int bits = addr.size;
30     const int inc = (flags & WRITE_BACK)?1:0;
31     switch (bits) {
32     case 32:
33         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
34         else        STR(AL, s.reg, addr.reg);
35         break;
36     case 24:
37         // 24 bits formats are a little special and used only for RGB
38         // 0x00BBGGRR is unpacked as R,G,B
39         STRB(AL, s.reg, addr.reg, immed12_pre(0));
40         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
41         STRB(AL, s.reg, addr.reg, immed12_pre(1));
42         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
43         STRB(AL, s.reg, addr.reg, immed12_pre(2));
44         if (!(s.flags & CORRUPTIBLE)) {
45             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
46         }
47         if (inc)
48             ADD(AL, 0, addr.reg, addr.reg, imm(3));
49         break;
50     case 16:
51         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
52         else        STRH(AL, s.reg, addr.reg);
53         break;
54     case  8:
55         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
56         else        STRB(AL, s.reg, addr.reg);
57         break;
58     }
59 }
60 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)61 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
62 {
63     Scratch scratches(registerFile());
64     int s0;
65 
66     const int bits = addr.size;
67     const int inc = (flags & WRITE_BACK)?1:0;
68     switch (bits) {
69     case 32:
70         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
71         else        LDR(AL, s.reg, addr.reg);
72         break;
73     case 24:
74         // 24 bits formats are a little special and used only for RGB
75         // R,G,B is packed as 0x00BBGGRR
76         s0 = scratches.obtain();
77         if (s.reg != addr.reg) {
78             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
79             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
80             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
81             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
82             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
83         } else {
84             int s1 = scratches.obtain();
85             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
86             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
87             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
88             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
89             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
90         }
91         if (inc)
92             ADD(AL, 0, addr.reg, addr.reg, imm(3));
93         break;
94     case 16:
95         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
96         else        LDRH(AL, s.reg, addr.reg);
97         break;
98     case  8:
99         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
100         else        LDRB(AL, s.reg, addr.reg);
101         break;
102     }
103 }
104 
extract(integer_t & d,int s,int h,int l,int bits)105 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
106 {
107     const int maskLen = h-l;
108 
109 #ifdef __mips__
110     assert(maskLen<=11);
111 #else
112     assert(maskLen<=8);
113 #endif
114     assert(h);
115 
116     if (h != bits) {
117         const int mask = ((1<<maskLen)-1) << l;
118         if (isValidImmediate(mask)) {
119             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
120         } else if (isValidImmediate(~mask)) {
121             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
122         } else {
123             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
124             l += 32-h;
125             h = 32;
126         }
127         s = d.reg;
128     }
129 
130     if (l) {
131         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
132         s = d.reg;
133     }
134 
135     if (s != d.reg) {
136         MOV(AL, 0, d.reg, s);
137     }
138 
139     d.s = maskLen;
140 }
141 
extract(integer_t & d,const pixel_t & s,int component)142 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
143 {
144     extract(d,  s.reg,
145                 s.format.c[component].h,
146                 s.format.c[component].l,
147                 s.size());
148 }
149 
extract(component_t & d,const pixel_t & s,int component)150 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
151 {
152     integer_t r(d.reg, 32, d.flags);
153     extract(r,  s.reg,
154                 s.format.c[component].h,
155                 s.format.c[component].l,
156                 s.size());
157     d = component_t(r);
158 }
159 
160 
expand(integer_t & d,const component_t & s,int dbits)161 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
162 {
163     if (s.l || (s.flags & CLEAR_HI)) {
164         extract(d, s.reg, s.h, s.l, 32);
165         expand(d, d, dbits);
166     } else {
167         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
168     }
169 }
170 
expand(component_t & d,const component_t & s,int dbits)171 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
172 {
173     integer_t r(d.reg, 32, d.flags);
174     expand(r, s, dbits);
175     d = component_t(r);
176 }
177 
expand(integer_t & dst,const integer_t & src,int dbits)178 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
179 {
180     assert(src.size());
181 
182     int sbits = src.size();
183     int s = src.reg;
184     int d = dst.reg;
185 
186     // be sure to set 'dst' after we read 'src' as they may be identical
187     dst.s = dbits;
188     dst.flags = 0;
189 
190     if (dbits<=sbits) {
191         if (s != d) {
192             MOV(AL, 0, d, s);
193         }
194         return;
195     }
196 
197     if (sbits == 1) {
198         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
199             // d = (s<<dbits) - s;
200         return;
201     }
202 
203     if (dbits % sbits) {
204         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
205             // d = s << (dbits-sbits);
206         dbits -= sbits;
207         do {
208             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
209                 // d |= d >> sbits;
210             dbits -= sbits;
211             sbits *= 2;
212         } while(dbits>0);
213         return;
214     }
215 
216     dbits -= sbits;
217     do {
218         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
219             // d |= d<<sbits;
220         s = d;
221         dbits -= sbits;
222         if (sbits*2 < dbits) {
223             sbits *= 2;
224         }
225     } while(dbits>0);
226 }
227 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)228 void GGLAssembler::downshift(
229         pixel_t& d, int component, component_t s, const reg_t& dither)
230 {
231     const needs_t& needs = mBuilderContext.needs;
232     Scratch scratches(registerFile());
233 
234     int sh = s.h;
235     int sl = s.l;
236     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
237     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
238     int sbits = sh - sl;
239 
240     int dh = d.format.c[component].h;
241     int dl = d.format.c[component].l;
242     int dbits = dh - dl;
243     int dithering = 0;
244 
245     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
246 
247     if (sbits>dbits) {
248         // see if we need to dither
249         dithering = mDithering;
250     }
251 
252     int ireg = d.reg;
253     if (!(d.flags & FIRST)) {
254         if (s.flags & CORRUPTIBLE)  {
255             ireg = s.reg;
256         } else {
257             ireg = scratches.obtain();
258         }
259     }
260     d.flags &= ~FIRST;
261 
262     if (maskHiBits) {
263         // we need to mask the high bits (and possibly the lowbits too)
264         // and we might be able to use immediate mask.
265         if (!dithering) {
266             // we don't do this if we only have maskLoBits because we can
267             // do it more efficiently below (in the case where dl=0)
268             const int offset = sh - dbits;
269             if (dbits<=8 && offset >= 0) {
270                 const uint32_t mask = ((1<<dbits)-1) << offset;
271                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
272                     build_and_immediate(ireg, s.reg, mask, 32);
273                     sl = offset;
274                     s.reg = ireg;
275                     sbits = dbits;
276                     maskLoBits = maskHiBits = 0;
277                 }
278             }
279         } else {
280             // in the dithering case though, we need to preserve the lower bits
281             const uint32_t mask = ((1<<sbits)-1) << sl;
282             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
283                 build_and_immediate(ireg, s.reg, mask, 32);
284                 s.reg = ireg;
285                 maskLoBits = maskHiBits = 0;
286             }
287         }
288     }
289 
290     // XXX: we could special case (maskHiBits & !maskLoBits)
291     // like we do for maskLoBits below, but it happens very rarely
292     // that we have maskHiBits only and the conditions necessary to lead
293     // to better code (like doing d |= s << 24)
294 
295     if (maskHiBits) {
296         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
297         sl += 32-sh;
298         sh = 32;
299         s.reg = ireg;
300         maskHiBits = 0;
301     }
302 
303     //	Downsampling should be performed as follows:
304     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
305     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
306     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
307     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
308     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
309     //
310     //	By approximating (1>>dbits) and (1>>sbits) to 0:
311     //
312     //		V>>(sbits-dbits)	-	V>>sbits
313     //
314 	//  A good approximation is V>>(sbits-dbits),
315     //  but better one (needed for dithering) is:
316     //
317     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
318     //		(V<<dbits	-	V)>>sbits
319     //		(V	-	V>>dbits)>>(sbits-dbits)
320 
321     // Dithering is done here
322     if (dithering) {
323         comment("dithering");
324         if (sl) {
325             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
326             sh -= sl;
327             sl = 0;
328             s.reg = ireg;
329         }
330         // scaling (V-V>>dbits)
331         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
332         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
333         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
334         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
335         else                ADD(AL, 0, ireg, ireg, dither.reg);
336         s.reg = ireg;
337     }
338 
339     if ((maskLoBits|dithering) && (sh > dbits)) {
340         int shift = sh-dbits;
341         if (dl) {
342             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
343             if (ireg == d.reg) {
344                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
345             } else {
346                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
347             }
348         } else {
349             if (ireg == d.reg) {
350                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
351             } else {
352                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
353             }
354         }
355     } else {
356         int shift = sh-dh;
357         if (shift>0) {
358             if (ireg == d.reg) {
359                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
360             } else {
361                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
362             }
363         } else if (shift<0) {
364             if (ireg == d.reg) {
365                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
366             } else {
367                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
368             }
369         } else {
370             if (ireg == d.reg) {
371                 if (s.reg != d.reg) {
372                     MOV(AL, 0, d.reg, s.reg);
373                 }
374             } else {
375                 ORR(AL, 0, d.reg, d.reg, s.reg);
376             }
377         }
378     }
379 }
380 
381 }; // namespace android
382