• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "GGLAssembler.h"
22 
23 #ifdef __ARM_ARCH__
24 #include <machine/cpu-features.h>
25 #endif
26 
27 namespace android {
28 
29 // ----------------------------------------------------------------------------
30 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33     const int bits = addr.size;
34     const int inc = (flags & WRITE_BACK)?1:0;
35     switch (bits) {
36     case 32:
37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38         else        STR(AL, s.reg, addr.reg);
39         break;
40     case 24:
41         // 24 bits formats are a little special and used only for RGB
42         // 0x00BBGGRR is unpacked as R,G,B
43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
48         if (!(s.flags & CORRUPTIBLE)) {
49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50         }
51         if (inc)
52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
53         break;
54     case 16:
55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56         else        STRH(AL, s.reg, addr.reg);
57         break;
58     case  8:
59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60         else        STRB(AL, s.reg, addr.reg);
61         break;
62     }
63 }
64 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67     Scratch scratches(registerFile());
68     int s0;
69 
70     const int bits = addr.size;
71     const int inc = (flags & WRITE_BACK)?1:0;
72     switch (bits) {
73     case 32:
74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75         else        LDR(AL, s.reg, addr.reg);
76         break;
77     case 24:
78         // 24 bits formats are a little special and used only for RGB
79         // R,G,B is packed as 0x00BBGGRR
80         s0 = scratches.obtain();
81         if (s.reg != addr.reg) {
82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87         } else {
88             int s1 = scratches.obtain();
89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94         }
95         if (inc)
96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
97         break;
98     case 16:
99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100         else        LDRH(AL, s.reg, addr.reg);
101         break;
102     case  8:
103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104         else        LDRB(AL, s.reg, addr.reg);
105         break;
106     }
107 }
108 
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111     const int maskLen = h-l;
112 
113 #ifdef __mips__
114     assert(maskLen<=11);
115 #else
116     assert(maskLen<=8);
117 #endif
118     assert(h);
119 
120 #if __ARM_ARCH__ >= 7
121     const int mask = (1<<maskLen)-1;
122     if ((h == bits) && !l && (s != d.reg)) {
123         MOV(AL, 0, d.reg, s);                   // component = packed;
124     } else if ((h == bits) && l) {
125         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
126     } else if (!l && isValidImmediate(mask)) {
127         AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
128     } else if (!l && isValidImmediate(~mask)) {
129         BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
130     } else {
131         UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
132     }
133 #else
134     if (h != bits) {
135         const int mask = ((1<<maskLen)-1) << l;
136         if (isValidImmediate(mask)) {
137             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
138         } else if (isValidImmediate(~mask)) {
139             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
140         } else {
141             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
142             l += 32-h;
143             h = 32;
144         }
145         s = d.reg;
146     }
147 
148     if (l) {
149         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
150         s = d.reg;
151     }
152 
153     if (s != d.reg) {
154         MOV(AL, 0, d.reg, s);
155     }
156 #endif
157 
158     d.s = maskLen;
159 }
160 
extract(integer_t & d,const pixel_t & s,int component)161 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
162 {
163     extract(d,  s.reg,
164                 s.format.c[component].h,
165                 s.format.c[component].l,
166                 s.size());
167 }
168 
extract(component_t & d,const pixel_t & s,int component)169 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
170 {
171     integer_t r(d.reg, 32, d.flags);
172     extract(r,  s.reg,
173                 s.format.c[component].h,
174                 s.format.c[component].l,
175                 s.size());
176     d = component_t(r);
177 }
178 
179 
expand(integer_t & d,const component_t & s,int dbits)180 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
181 {
182     if (s.l || (s.flags & CLEAR_HI)) {
183         extract(d, s.reg, s.h, s.l, 32);
184         expand(d, d, dbits);
185     } else {
186         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
187     }
188 }
189 
expand(component_t & d,const component_t & s,int dbits)190 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
191 {
192     integer_t r(d.reg, 32, d.flags);
193     expand(r, s, dbits);
194     d = component_t(r);
195 }
196 
expand(integer_t & dst,const integer_t & src,int dbits)197 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
198 {
199     assert(src.size());
200 
201     int sbits = src.size();
202     int s = src.reg;
203     int d = dst.reg;
204 
205     // be sure to set 'dst' after we read 'src' as they may be identical
206     dst.s = dbits;
207     dst.flags = 0;
208 
209     if (dbits<=sbits) {
210         if (s != d) {
211             MOV(AL, 0, d, s);
212         }
213         return;
214     }
215 
216     if (sbits == 1) {
217         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
218             // d = (s<<dbits) - s;
219         return;
220     }
221 
222     if (dbits % sbits) {
223         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
224             // d = s << (dbits-sbits);
225         dbits -= sbits;
226         do {
227             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
228                 // d |= d >> sbits;
229             dbits -= sbits;
230             sbits *= 2;
231         } while(dbits>0);
232         return;
233     }
234 
235     dbits -= sbits;
236     do {
237         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
238             // d |= d<<sbits;
239         s = d;
240         dbits -= sbits;
241         if (sbits*2 < dbits) {
242             sbits *= 2;
243         }
244     } while(dbits>0);
245 }
246 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)247 void GGLAssembler::downshift(
248         pixel_t& d, int component, component_t s, const reg_t& dither)
249 {
250     const needs_t& needs = mBuilderContext.needs;
251     Scratch scratches(registerFile());
252 
253     int sh = s.h;
254     int sl = s.l;
255     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
256     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
257     int sbits = sh - sl;
258 
259     int dh = d.format.c[component].h;
260     int dl = d.format.c[component].l;
261     int dbits = dh - dl;
262     int dithering = 0;
263 
264     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
265 
266     if (sbits>dbits) {
267         // see if we need to dither
268         dithering = mDithering;
269     }
270 
271     int ireg = d.reg;
272     if (!(d.flags & FIRST)) {
273         if (s.flags & CORRUPTIBLE)  {
274             ireg = s.reg;
275         } else {
276             ireg = scratches.obtain();
277         }
278     }
279     d.flags &= ~FIRST;
280 
281     if (maskHiBits) {
282         // we need to mask the high bits (and possibly the lowbits too)
283         // and we might be able to use immediate mask.
284         if (!dithering) {
285             // we don't do this if we only have maskLoBits because we can
286             // do it more efficiently below (in the case where dl=0)
287             const int offset = sh - dbits;
288             if (dbits<=8 && offset >= 0) {
289                 const uint32_t mask = ((1<<dbits)-1) << offset;
290                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
291                     build_and_immediate(ireg, s.reg, mask, 32);
292                     sl = offset;
293                     s.reg = ireg;
294                     sbits = dbits;
295                     maskLoBits = maskHiBits = 0;
296                 }
297             }
298         } else {
299             // in the dithering case though, we need to preserve the lower bits
300             const uint32_t mask = ((1<<sbits)-1) << sl;
301             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
302                 build_and_immediate(ireg, s.reg, mask, 32);
303                 s.reg = ireg;
304                 maskLoBits = maskHiBits = 0;
305             }
306         }
307     }
308 
309     // XXX: we could special case (maskHiBits & !maskLoBits)
310     // like we do for maskLoBits below, but it happens very rarely
311     // that we have maskHiBits only and the conditions necessary to lead
312     // to better code (like doing d |= s << 24)
313 
314     if (maskHiBits) {
315         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
316         sl += 32-sh;
317         sh = 32;
318         s.reg = ireg;
319         maskHiBits = 0;
320     }
321 
322     //	Downsampling should be performed as follows:
323     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
324     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
325     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
326     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
327     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
328     //
329     //	By approximating (1>>dbits) and (1>>sbits) to 0:
330     //
331     //		V>>(sbits-dbits)	-	V>>sbits
332     //
333 	//  A good approximation is V>>(sbits-dbits),
334     //  but better one (needed for dithering) is:
335     //
336     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
337     //		(V<<dbits	-	V)>>sbits
338     //		(V	-	V>>dbits)>>(sbits-dbits)
339 
340     // Dithering is done here
341     if (dithering) {
342         comment("dithering");
343         if (sl) {
344             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
345             sh -= sl;
346             sl = 0;
347             s.reg = ireg;
348         }
349         // scaling (V-V>>dbits)
350         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
351         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
352         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
353         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
354         else                ADD(AL, 0, ireg, ireg, dither.reg);
355         s.reg = ireg;
356     }
357 
358     if ((maskLoBits|dithering) && (sh > dbits)) {
359         int shift = sh-dbits;
360         if (dl) {
361             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
362             if (ireg == d.reg) {
363                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
364             } else {
365                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
366             }
367         } else {
368             if (ireg == d.reg) {
369                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
370             } else {
371                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
372             }
373         }
374     } else {
375         int shift = sh-dh;
376         if (shift>0) {
377             if (ireg == d.reg) {
378                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
379             } else {
380                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
381             }
382         } else if (shift<0) {
383             if (ireg == d.reg) {
384                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
385             } else {
386                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
387             }
388         } else {
389             if (ireg == d.reg) {
390                 if (s.reg != d.reg) {
391                     MOV(AL, 0, d.reg, s.reg);
392                 }
393             } else {
394                 ORR(AL, 0, d.reg, d.reg, s.reg);
395             }
396         }
397     }
398 }
399 
400 }; // namespace android
401