• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "codeflinger/GGLAssembler.h"
22 
23 #ifdef __ARM_ARCH__
24 #include <machine/cpu-features.h>
25 #endif
26 
27 namespace android {
28 
29 // ----------------------------------------------------------------------------
30 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33     const int bits = addr.size;
34     const int inc = (flags & WRITE_BACK)?1:0;
35     switch (bits) {
36     case 32:
37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38         else        STR(AL, s.reg, addr.reg);
39         break;
40     case 24:
41         // 24 bits formats are a little special and used only for RGB
42         // 0x00BBGGRR is unpacked as R,G,B
43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
48         if (!(s.flags & CORRUPTIBLE)) {
49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50         }
51         if (inc)
52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
53         break;
54     case 16:
55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56         else        STRH(AL, s.reg, addr.reg);
57         break;
58     case  8:
59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60         else        STRB(AL, s.reg, addr.reg);
61         break;
62     }
63 }
64 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67     Scratch scratches(registerFile());
68     int s0;
69 
70     const int bits = addr.size;
71     const int inc = (flags & WRITE_BACK)?1:0;
72     switch (bits) {
73     case 32:
74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75         else        LDR(AL, s.reg, addr.reg);
76         break;
77     case 24:
78         // 24 bits formats are a little special and used only for RGB
79         // R,G,B is packed as 0x00BBGGRR
80         s0 = scratches.obtain();
81         if (s.reg != addr.reg) {
82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87         } else {
88             int s1 = scratches.obtain();
89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94         }
95         if (inc)
96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
97         break;
98     case 16:
99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100         else        LDRH(AL, s.reg, addr.reg);
101         break;
102     case  8:
103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104         else        LDRB(AL, s.reg, addr.reg);
105         break;
106     }
107 }
108 
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111     const int maskLen = h-l;
112 
113     assert(maskLen<=8);
114     assert(h);
115 
116 #if __ARM_ARCH__ >= 7
117     const int mask = (1<<maskLen)-1;
118     if ((h == bits) && !l && (s != d.reg)) {
119         MOV(AL, 0, d.reg, s);                   // component = packed;
120     } else if ((h == bits) && l) {
121         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
122     } else if (!l && isValidImmediate(mask)) {
123         AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
124     } else if (!l && isValidImmediate(~mask)) {
125         BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
126     } else {
127         UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
128     }
129 #else
130     if (h != bits) {
131         const int mask = ((1<<maskLen)-1) << l;
132         if (isValidImmediate(mask)) {
133             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
134         } else if (isValidImmediate(~mask)) {
135             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
136         } else {
137             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
138             l += 32-h;
139             h = 32;
140         }
141         s = d.reg;
142     }
143 
144     if (l) {
145         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
146         s = d.reg;
147     }
148 
149     if (s != d.reg) {
150         MOV(AL, 0, d.reg, s);
151     }
152 #endif
153 
154     d.s = maskLen;
155 }
156 
extract(integer_t & d,const pixel_t & s,int component)157 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
158 {
159     extract(d,  s.reg,
160                 s.format.c[component].h,
161                 s.format.c[component].l,
162                 s.size());
163 }
164 
extract(component_t & d,const pixel_t & s,int component)165 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
166 {
167     integer_t r(d.reg, 32, d.flags);
168     extract(r,  s.reg,
169                 s.format.c[component].h,
170                 s.format.c[component].l,
171                 s.size());
172     d = component_t(r);
173 }
174 
175 
expand(integer_t & d,const component_t & s,int dbits)176 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
177 {
178     if (s.l || (s.flags & CLEAR_HI)) {
179         extract(d, s.reg, s.h, s.l, 32);
180         expand(d, d, dbits);
181     } else {
182         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
183     }
184 }
185 
expand(component_t & d,const component_t & s,int dbits)186 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
187 {
188     integer_t r(d.reg, 32, d.flags);
189     expand(r, s, dbits);
190     d = component_t(r);
191 }
192 
expand(integer_t & dst,const integer_t & src,int dbits)193 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
194 {
195     assert(src.size());
196 
197     int sbits = src.size();
198     int s = src.reg;
199     int d = dst.reg;
200 
201     // be sure to set 'dst' after we read 'src' as they may be identical
202     dst.s = dbits;
203     dst.flags = 0;
204 
205     if (dbits<=sbits) {
206         if (s != d) {
207             MOV(AL, 0, d, s);
208         }
209         return;
210     }
211 
212     if (sbits == 1) {
213         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
214             // d = (s<<dbits) - s;
215         return;
216     }
217 
218     if (dbits % sbits) {
219         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
220             // d = s << (dbits-sbits);
221         dbits -= sbits;
222         do {
223             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
224                 // d |= d >> sbits;
225             dbits -= sbits;
226             sbits *= 2;
227         } while(dbits>0);
228         return;
229     }
230 
231     dbits -= sbits;
232     do {
233         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
234             // d |= d<<sbits;
235         s = d;
236         dbits -= sbits;
237         if (sbits*2 < dbits) {
238             sbits *= 2;
239         }
240     } while(dbits>0);
241 }
242 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)243 void GGLAssembler::downshift(
244         pixel_t& d, int component, component_t s, const reg_t& dither)
245 {
246     const needs_t& needs = mBuilderContext.needs;
247     Scratch scratches(registerFile());
248 
249     int sh = s.h;
250     int sl = s.l;
251     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
252     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
253     int sbits = sh - sl;
254 
255     int dh = d.format.c[component].h;
256     int dl = d.format.c[component].l;
257     int dbits = dh - dl;
258     int dithering = 0;
259 
260     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
261 
262     if (sbits>dbits) {
263         // see if we need to dither
264         dithering = mDithering;
265     }
266 
267     int ireg = d.reg;
268     if (!(d.flags & FIRST)) {
269         if (s.flags & CORRUPTIBLE)  {
270             ireg = s.reg;
271         } else {
272             ireg = scratches.obtain();
273         }
274     }
275     d.flags &= ~FIRST;
276 
277     if (maskHiBits) {
278         // we need to mask the high bits (and possibly the lowbits too)
279         // and we might be able to use immediate mask.
280         if (!dithering) {
281             // we don't do this if we only have maskLoBits because we can
282             // do it more efficiently below (in the case where dl=0)
283             const int offset = sh - dbits;
284             if (dbits<=8 && offset >= 0) {
285                 const uint32_t mask = ((1<<dbits)-1) << offset;
286                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287                     build_and_immediate(ireg, s.reg, mask, 32);
288                     sl = offset;
289                     s.reg = ireg;
290                     sbits = dbits;
291                     maskLoBits = maskHiBits = 0;
292                 }
293             }
294         } else {
295             // in the dithering case though, we need to preserve the lower bits
296             const uint32_t mask = ((1<<sbits)-1) << sl;
297             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
298                 build_and_immediate(ireg, s.reg, mask, 32);
299                 s.reg = ireg;
300                 maskLoBits = maskHiBits = 0;
301             }
302         }
303     }
304 
305     // XXX: we could special case (maskHiBits & !maskLoBits)
306     // like we do for maskLoBits below, but it happens very rarely
307     // that we have maskHiBits only and the conditions necessary to lead
308     // to better code (like doing d |= s << 24)
309 
310     if (maskHiBits) {
311         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
312         sl += 32-sh;
313         sh = 32;
314         s.reg = ireg;
315         maskHiBits = 0;
316     }
317 
318     //	Downsampling should be performed as follows:
319     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
320     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
321     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
322     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
323     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
324     //
325     //	By approximating (1>>dbits) and (1>>sbits) to 0:
326     //
327     //		V>>(sbits-dbits)	-	V>>sbits
328     //
329 	//  A good approximation is V>>(sbits-dbits),
330     //  but better one (needed for dithering) is:
331     //
332     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
333     //		(V<<dbits	-	V)>>sbits
334     //		(V	-	V>>dbits)>>(sbits-dbits)
335 
336     // Dithering is done here
337     if (dithering) {
338         comment("dithering");
339         if (sl) {
340             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
341             sh -= sl;
342             sl = 0;
343             s.reg = ireg;
344         }
345         // scaling (V-V>>dbits)
346         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
347         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
348         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
349         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
350         else                ADD(AL, 0, ireg, ireg, dither.reg);
351         s.reg = ireg;
352     }
353 
354     if ((maskLoBits|dithering) && (sh > dbits)) {
355         int shift = sh-dbits;
356         if (dl) {
357             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
358             if (ireg == d.reg) {
359                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
360             } else {
361                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
362             }
363         } else {
364             if (ireg == d.reg) {
365                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
366             } else {
367                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
368             }
369         }
370     } else {
371         int shift = sh-dh;
372         if (shift>0) {
373             if (ireg == d.reg) {
374                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
375             } else {
376                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
377             }
378         } else if (shift<0) {
379             if (ireg == d.reg) {
380                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
381             } else {
382                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
383             }
384         } else {
385             if (ireg == d.reg) {
386                 if (s.reg != d.reg) {
387                     MOV(AL, 0, d.reg, s.reg);
388                 }
389             } else {
390                 ORR(AL, 0, d.reg, d.reg, s.reg);
391             }
392         }
393     }
394 }
395 
396 }; // namespace android
397