• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 
22 #include "codeflinger/GGLAssembler.h"
23 
24 namespace android {
25 
26 // ----------------------------------------------------------------------------
27 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)28 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
29 {
30     const int bits = addr.size;
31     const int inc = (flags & WRITE_BACK)?1:0;
32     switch (bits) {
33     case 32:
34         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
35         else        STR(AL, s.reg, addr.reg);
36         break;
37     case 24:
38         // 24 bits formats are a little special and used only for RGB
39         // 0x00BBGGRR is unpacked as R,G,B
40         STRB(AL, s.reg, addr.reg, immed12_pre(0));
41         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
42         STRB(AL, s.reg, addr.reg, immed12_pre(1));
43         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
44         STRB(AL, s.reg, addr.reg, immed12_pre(2));
45         if (!(s.flags & CORRUPTIBLE)) {
46             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
47         }
48         if (inc)
49             ADD(AL, 0, addr.reg, addr.reg, imm(3));
50         break;
51     case 16:
52         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
53         else        STRH(AL, s.reg, addr.reg);
54         break;
55     case  8:
56         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
57         else        STRB(AL, s.reg, addr.reg);
58         break;
59     }
60 }
61 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)62 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
63 {
64     Scratch scratches(registerFile());
65     int s0;
66 
67     const int bits = addr.size;
68     const int inc = (flags & WRITE_BACK)?1:0;
69     switch (bits) {
70     case 32:
71         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
72         else        LDR(AL, s.reg, addr.reg);
73         break;
74     case 24:
75         // 24 bits formats are a little special and used only for RGB
76         // R,G,B is packed as 0x00BBGGRR
77         s0 = scratches.obtain();
78         if (s.reg != addr.reg) {
79             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
80             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
81             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
82             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
83             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
84         } else {
85             int s1 = scratches.obtain();
86             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
87             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
88             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
89             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
90             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
91         }
92         if (inc)
93             ADD(AL, 0, addr.reg, addr.reg, imm(3));
94         break;
95     case 16:
96         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
97         else        LDRH(AL, s.reg, addr.reg);
98         break;
99     case  8:
100         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
101         else        LDRB(AL, s.reg, addr.reg);
102         break;
103     }
104 }
105 
extract(integer_t & d,int s,int h,int l,int bits)106 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
107 {
108     const int maskLen = h-l;
109 
110     assert(maskLen<=8);
111     assert(h);
112 
113     if (h != bits) {
114         const int mask = ((1<<maskLen)-1) << l;
115         if (isValidImmediate(mask)) {
116             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
117         } else if (isValidImmediate(~mask)) {
118             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
119         } else {
120             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
121             l += 32-h;
122             h = 32;
123         }
124         s = d.reg;
125     }
126 
127     if (l) {
128         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
129         s = d.reg;
130     }
131 
132     if (s != d.reg) {
133         MOV(AL, 0, d.reg, s);
134     }
135 
136     d.s = maskLen;
137 }
138 
extract(integer_t & d,const pixel_t & s,int component)139 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
140 {
141     extract(d,  s.reg,
142                 s.format.c[component].h,
143                 s.format.c[component].l,
144                 s.size());
145 }
146 
extract(component_t & d,const pixel_t & s,int component)147 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
148 {
149     integer_t r(d.reg, 32, d.flags);
150     extract(r,  s.reg,
151                 s.format.c[component].h,
152                 s.format.c[component].l,
153                 s.size());
154     d = component_t(r);
155 }
156 
157 
expand(integer_t & d,const component_t & s,int dbits)158 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
159 {
160     if (s.l || (s.flags & CLEAR_HI)) {
161         extract(d, s.reg, s.h, s.l, 32);
162         expand(d, d, dbits);
163     } else {
164         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
165     }
166 }
167 
expand(component_t & d,const component_t & s,int dbits)168 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
169 {
170     integer_t r(d.reg, 32, d.flags);
171     expand(r, s, dbits);
172     d = component_t(r);
173 }
174 
expand(integer_t & dst,const integer_t & src,int dbits)175 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
176 {
177     assert(src.size());
178 
179     int sbits = src.size();
180     int s = src.reg;
181     int d = dst.reg;
182 
183     // be sure to set 'dst' after we read 'src' as they may be identical
184     dst.s = dbits;
185     dst.flags = 0;
186 
187     if (dbits<=sbits) {
188         if (s != d) {
189             MOV(AL, 0, d, s);
190         }
191         return;
192     }
193 
194     if (sbits == 1) {
195         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
196             // d = (s<<dbits) - s;
197         return;
198     }
199 
200     if (dbits % sbits) {
201         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
202             // d = s << (dbits-sbits);
203         dbits -= sbits;
204         do {
205             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
206                 // d |= d >> sbits;
207             dbits -= sbits;
208             sbits *= 2;
209         } while(dbits>0);
210         return;
211     }
212 
213     dbits -= sbits;
214     do {
215         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
216             // d |= d<<sbits;
217         s = d;
218         dbits -= sbits;
219         if (sbits*2 < dbits) {
220             sbits *= 2;
221         }
222     } while(dbits>0);
223 }
224 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)225 void GGLAssembler::downshift(
226         pixel_t& d, int component, component_t s, const reg_t& dither)
227 {
228     const needs_t& needs = mBuilderContext.needs;
229     Scratch scratches(registerFile());
230 
231     int sh = s.h;
232     int sl = s.l;
233     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
234     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
235     int sbits = sh - sl;
236 
237     int dh = d.format.c[component].h;
238     int dl = d.format.c[component].l;
239     int dbits = dh - dl;
240     int dithering = 0;
241 
242     LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
243 
244     if (sbits>dbits) {
245         // see if we need to dither
246         dithering = mDithering;
247     }
248 
249     int ireg = d.reg;
250     if (!(d.flags & FIRST)) {
251         if (s.flags & CORRUPTIBLE)  {
252             ireg = s.reg;
253         } else {
254             ireg = scratches.obtain();
255         }
256     }
257     d.flags &= ~FIRST;
258 
259     if (maskHiBits) {
260         // we need to mask the high bits (and possibly the lowbits too)
261         // and we might be able to use immediate mask.
262         if (!dithering) {
263             // we don't do this if we only have maskLoBits because we can
264             // do it more efficiently below (in the case where dl=0)
265             const int offset = sh - dbits;
266             if (dbits<=8 && offset >= 0) {
267                 const uint32_t mask = ((1<<dbits)-1) << offset;
268                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
269                     build_and_immediate(ireg, s.reg, mask, 32);
270                     sl = offset;
271                     s.reg = ireg;
272                     sbits = dbits;
273                     maskLoBits = maskHiBits = 0;
274                 }
275             }
276         } else {
277             // in the dithering case though, we need to preserve the lower bits
278             const uint32_t mask = ((1<<sbits)-1) << sl;
279             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
280                 build_and_immediate(ireg, s.reg, mask, 32);
281                 s.reg = ireg;
282                 maskLoBits = maskHiBits = 0;
283             }
284         }
285     }
286 
287     // XXX: we could special case (maskHiBits & !maskLoBits)
288     // like we do for maskLoBits below, but it happens very rarely
289     // that we have maskHiBits only and the conditions necessary to lead
290     // to better code (like doing d |= s << 24)
291 
292     if (maskHiBits) {
293         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
294         sl += 32-sh;
295         sh = 32;
296         s.reg = ireg;
297         maskHiBits = 0;
298     }
299 
300     //	Downsampling should be performed as follows:
301     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
302     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
303     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
304     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
305     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
306     //
307     //	By approximating (1>>dbits) and (1>>sbits) to 0:
308     //
309     //		V>>(sbits-dbits)	-	V>>sbits
310     //
311 	//  A good approximation is V>>(sbits-dbits),
312     //  but better one (needed for dithering) is:
313     //
314     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
315     //		(V<<dbits	-	V)>>sbits
316     //		(V	-	V>>dbits)>>(sbits-dbits)
317 
318     // Dithering is done here
319     if (dithering) {
320         comment("dithering");
321         if (sl) {
322             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
323             sh -= sl;
324             sl = 0;
325             s.reg = ireg;
326         }
327         // scaling (V-V>>dbits)
328         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
329         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
330         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
331         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
332         else                ADD(AL, 0, ireg, ireg, dither.reg);
333         s.reg = ireg;
334     }
335 
336     if ((maskLoBits|dithering) && (sh > dbits)) {
337         int shift = sh-dbits;
338         if (dl) {
339             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
340             if (ireg == d.reg) {
341                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
342             } else {
343                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
344             }
345         } else {
346             if (ireg == d.reg) {
347                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
348             } else {
349                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
350             }
351         }
352     } else {
353         int shift = sh-dh;
354         if (shift>0) {
355             if (ireg == d.reg) {
356                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
357             } else {
358                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
359             }
360         } else if (shift<0) {
361             if (ireg == d.reg) {
362                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
363             } else {
364                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
365             }
366         } else {
367             if (ireg == d.reg) {
368                 if (s.reg != d.reg) {
369                     MOV(AL, 0, d.reg, s.reg);
370                 }
371             } else {
372                 ORR(AL, 0, d.reg, d.reg, s.reg);
373             }
374         }
375     }
376 }
377 
378 }; // namespace android
379