• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #define LOG_TAG "pixelflinger-code"
19 
20 #include <assert.h>
21 #include <stdio.h>
22 
23 #include <log/log.h>
24 
25 #include "GGLAssembler.h"
26 
27 namespace android {
28 
29 // ----------------------------------------------------------------------------
30 
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33     const int bits = addr.size;
34     const int inc = (flags & WRITE_BACK)?1:0;
35     switch (bits) {
36     case 32:
37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38         else        STR(AL, s.reg, addr.reg);
39         break;
40     case 24:
41         // 24 bits formats are a little special and used only for RGB
42         // 0x00BBGGRR is unpacked as R,G,B
43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
48         if (!(s.flags & CORRUPTIBLE)) {
49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50         }
51         if (inc)
52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
53         break;
54     case 16:
55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56         else        STRH(AL, s.reg, addr.reg);
57         break;
58     case  8:
59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60         else        STRB(AL, s.reg, addr.reg);
61         break;
62     }
63 }
64 
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67     Scratch scratches(registerFile());
68     int s0;
69 
70     const int bits = addr.size;
71     const int inc = (flags & WRITE_BACK)?1:0;
72     switch (bits) {
73     case 32:
74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75         else        LDR(AL, s.reg, addr.reg);
76         break;
77     case 24:
78         // 24 bits formats are a little special and used only for RGB
79         // R,G,B is packed as 0x00BBGGRR
80         s0 = scratches.obtain();
81         if (s.reg != addr.reg) {
82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87         } else {
88             int s1 = scratches.obtain();
89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94         }
95         if (inc)
96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
97         break;
98     case 16:
99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100         else        LDRH(AL, s.reg, addr.reg);
101         break;
102     case  8:
103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104         else        LDRB(AL, s.reg, addr.reg);
105         break;
106     }
107 }
108 
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111     const int maskLen = h-l;
112 
113 #ifdef __mips__
114     assert(maskLen<=11);
115 #else
116     assert(maskLen<=8);
117 #endif
118     assert(h);
119 
120     if (h != bits) {
121         const int mask = ((1<<maskLen)-1) << l;
122         if (isValidImmediate(mask)) {
123             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
124         } else if (isValidImmediate(~mask)) {
125             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
126         } else {
127             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
128             l += 32-h;
129             h = 32;
130         }
131         s = d.reg;
132     }
133 
134     if (l) {
135         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
136         s = d.reg;
137     }
138 
139     if (s != d.reg) {
140         MOV(AL, 0, d.reg, s);
141     }
142 
143     d.s = maskLen;
144 }
145 
extract(integer_t & d,const pixel_t & s,int component)146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
147 {
148     extract(d,  s.reg,
149                 s.format.c[component].h,
150                 s.format.c[component].l,
151                 s.size());
152 }
153 
extract(component_t & d,const pixel_t & s,int component)154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
155 {
156     integer_t r(d.reg, 32, d.flags);
157     extract(r,  s.reg,
158                 s.format.c[component].h,
159                 s.format.c[component].l,
160                 s.size());
161     d = component_t(r);
162 }
163 
164 
expand(integer_t & d,const component_t & s,int dbits)165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
166 {
167     if (s.l || (s.flags & CLEAR_HI)) {
168         extract(d, s.reg, s.h, s.l, 32);
169         expand(d, d, dbits);
170     } else {
171         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
172     }
173 }
174 
expand(component_t & d,const component_t & s,int dbits)175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
176 {
177     integer_t r(d.reg, 32, d.flags);
178     expand(r, s, dbits);
179     d = component_t(r);
180 }
181 
expand(integer_t & dst,const integer_t & src,int dbits)182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
183 {
184     assert(src.size());
185 
186     int sbits = src.size();
187     int s = src.reg;
188     int d = dst.reg;
189 
190     // be sure to set 'dst' after we read 'src' as they may be identical
191     dst.s = dbits;
192     dst.flags = 0;
193 
194     if (dbits<=sbits) {
195         if (s != d) {
196             MOV(AL, 0, d, s);
197         }
198         return;
199     }
200 
201     if (sbits == 1) {
202         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
203             // d = (s<<dbits) - s;
204         return;
205     }
206 
207     if (dbits % sbits) {
208         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
209             // d = s << (dbits-sbits);
210         dbits -= sbits;
211         do {
212             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
213                 // d |= d >> sbits;
214             dbits -= sbits;
215             sbits *= 2;
216         } while(dbits>0);
217         return;
218     }
219 
220     dbits -= sbits;
221     do {
222         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
223             // d |= d<<sbits;
224         s = d;
225         dbits -= sbits;
226         if (sbits*2 < dbits) {
227             sbits *= 2;
228         }
229     } while(dbits>0);
230 }
231 
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)232 void GGLAssembler::downshift(
233         pixel_t& d, int component, component_t s, const reg_t& dither)
234 {
235     const needs_t& needs = mBuilderContext.needs;
236     Scratch scratches(registerFile());
237 
238     int sh = s.h;
239     int sl = s.l;
240     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
241     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
242     int sbits = sh - sl;
243 
244     int dh = d.format.c[component].h;
245     int dl = d.format.c[component].l;
246     int dbits = dh - dl;
247     int dithering = 0;
248 
249     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
250 
251     if (sbits>dbits) {
252         // see if we need to dither
253         dithering = mDithering;
254     }
255 
256     int ireg = d.reg;
257     if (!(d.flags & FIRST)) {
258         if (s.flags & CORRUPTIBLE)  {
259             ireg = s.reg;
260         } else {
261             ireg = scratches.obtain();
262         }
263     }
264     d.flags &= ~FIRST;
265 
266     if (maskHiBits) {
267         // we need to mask the high bits (and possibly the lowbits too)
268         // and we might be able to use immediate mask.
269         if (!dithering) {
270             // we don't do this if we only have maskLoBits because we can
271             // do it more efficiently below (in the case where dl=0)
272             const int offset = sh - dbits;
273             if (dbits<=8 && offset >= 0) {
274                 const uint32_t mask = ((1<<dbits)-1) << offset;
275                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
276                     build_and_immediate(ireg, s.reg, mask, 32);
277                     sl = offset;
278                     s.reg = ireg;
279                     sbits = dbits;
280                     maskLoBits = maskHiBits = 0;
281                 }
282             }
283         } else {
284             // in the dithering case though, we need to preserve the lower bits
285             const uint32_t mask = ((1<<sbits)-1) << sl;
286             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287                 build_and_immediate(ireg, s.reg, mask, 32);
288                 s.reg = ireg;
289                 maskLoBits = maskHiBits = 0;
290             }
291         }
292     }
293 
294     // XXX: we could special case (maskHiBits & !maskLoBits)
295     // like we do for maskLoBits below, but it happens very rarely
296     // that we have maskHiBits only and the conditions necessary to lead
297     // to better code (like doing d |= s << 24)
298 
299     if (maskHiBits) {
300         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
301         sl += 32-sh;
302         sh = 32;
303         s.reg = ireg;
304         maskHiBits = 0;
305     }
306 
307     //	Downsampling should be performed as follows:
308     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
309     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
310     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
311     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
312     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
313     //
314     //	By approximating (1>>dbits) and (1>>sbits) to 0:
315     //
316     //		V>>(sbits-dbits)	-	V>>sbits
317     //
318 	//  A good approximation is V>>(sbits-dbits),
319     //  but better one (needed for dithering) is:
320     //
321     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
322     //		(V<<dbits	-	V)>>sbits
323     //		(V	-	V>>dbits)>>(sbits-dbits)
324 
325     // Dithering is done here
326     if (dithering) {
327         comment("dithering");
328         if (sl) {
329             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
330             sh -= sl;
331             sl = 0;
332             s.reg = ireg;
333         }
334         // scaling (V-V>>dbits)
335         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
336         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
337         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
338         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
339         else                ADD(AL, 0, ireg, ireg, dither.reg);
340         s.reg = ireg;
341     }
342 
343     if ((maskLoBits|dithering) && (sh > dbits)) {
344         int shift = sh-dbits;
345         if (dl) {
346             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
347             if (ireg == d.reg) {
348                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
349             } else {
350                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
351             }
352         } else {
353             if (ireg == d.reg) {
354                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
355             } else {
356                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
357             }
358         }
359     } else {
360         int shift = sh-dh;
361         if (shift>0) {
362             if (ireg == d.reg) {
363                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
364             } else {
365                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
366             }
367         } else if (shift<0) {
368             if (ireg == d.reg) {
369                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
370             } else {
371                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
372             }
373         } else {
374             if (ireg == d.reg) {
375                 if (s.reg != d.reg) {
376                     MOV(AL, 0, d.reg, s.reg);
377                 }
378             } else {
379                 ORR(AL, 0, d.reg, d.reg, s.reg);
380             }
381         }
382     }
383 }
384 
385 }; // namespace android
386