1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "pixelflinger-code"
19
20 #include <assert.h>
21 #include <stdio.h>
22
23 #include <log/log.h>
24
25 #include "GGLAssembler.h"
26
27 namespace android {
28
29 // ----------------------------------------------------------------------------
30
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33 const int bits = addr.size;
34 const int inc = (flags & WRITE_BACK)?1:0;
35 switch (bits) {
36 case 32:
37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
38 else STR(AL, s.reg, addr.reg);
39 break;
40 case 24:
41 // 24 bits formats are a little special and used only for RGB
42 // 0x00BBGGRR is unpacked as R,G,B
43 STRB(AL, s.reg, addr.reg, immed12_pre(0));
44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45 STRB(AL, s.reg, addr.reg, immed12_pre(1));
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47 STRB(AL, s.reg, addr.reg, immed12_pre(2));
48 if (!(s.flags & CORRUPTIBLE)) {
49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50 }
51 if (inc)
52 ADD(AL, 0, addr.reg, addr.reg, imm(3));
53 break;
54 case 16:
55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
56 else STRH(AL, s.reg, addr.reg);
57 break;
58 case 8:
59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
60 else STRB(AL, s.reg, addr.reg);
61 break;
62 }
63 }
64
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67 Scratch scratches(registerFile());
68 int s0;
69
70 const int bits = addr.size;
71 const int inc = (flags & WRITE_BACK)?1:0;
72 switch (bits) {
73 case 32:
74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
75 else LDR(AL, s.reg, addr.reg);
76 break;
77 case 24:
78 // 24 bits formats are a little special and used only for RGB
79 // R,G,B is packed as 0x00BBGGRR
80 s0 = scratches.obtain();
81 if (s.reg != addr.reg) {
82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87 } else {
88 int s1 = scratches.obtain();
89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94 }
95 if (inc)
96 ADD(AL, 0, addr.reg, addr.reg, imm(3));
97 break;
98 case 16:
99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
100 else LDRH(AL, s.reg, addr.reg);
101 break;
102 case 8:
103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
104 else LDRB(AL, s.reg, addr.reg);
105 break;
106 }
107 }
108
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111 const int maskLen = h-l;
112
113 #ifdef __mips__
114 assert(maskLen<=11);
115 #else
116 assert(maskLen<=8);
117 #endif
118 assert(h);
119
120 if (h != bits) {
121 const int mask = ((1<<maskLen)-1) << l;
122 if (isValidImmediate(mask)) {
123 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
124 } else if (isValidImmediate(~mask)) {
125 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
126 } else {
127 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
128 l += 32-h;
129 h = 32;
130 }
131 s = d.reg;
132 }
133
134 if (l) {
135 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
136 s = d.reg;
137 }
138
139 if (s != d.reg) {
140 MOV(AL, 0, d.reg, s);
141 }
142
143 d.s = maskLen;
144 }
145
extract(integer_t & d,const pixel_t & s,int component)146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
147 {
148 extract(d, s.reg,
149 s.format.c[component].h,
150 s.format.c[component].l,
151 s.size());
152 }
153
extract(component_t & d,const pixel_t & s,int component)154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
155 {
156 integer_t r(d.reg, 32, d.flags);
157 extract(r, s.reg,
158 s.format.c[component].h,
159 s.format.c[component].l,
160 s.size());
161 d = component_t(r);
162 }
163
164
expand(integer_t & d,const component_t & s,int dbits)165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
166 {
167 if (s.l || (s.flags & CLEAR_HI)) {
168 extract(d, s.reg, s.h, s.l, 32);
169 expand(d, d, dbits);
170 } else {
171 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
172 }
173 }
174
expand(component_t & d,const component_t & s,int dbits)175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
176 {
177 integer_t r(d.reg, 32, d.flags);
178 expand(r, s, dbits);
179 d = component_t(r);
180 }
181
expand(integer_t & dst,const integer_t & src,int dbits)182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
183 {
184 assert(src.size());
185
186 int sbits = src.size();
187 int s = src.reg;
188 int d = dst.reg;
189
190 // be sure to set 'dst' after we read 'src' as they may be identical
191 dst.s = dbits;
192 dst.flags = 0;
193
194 if (dbits<=sbits) {
195 if (s != d) {
196 MOV(AL, 0, d, s);
197 }
198 return;
199 }
200
201 if (sbits == 1) {
202 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
203 // d = (s<<dbits) - s;
204 return;
205 }
206
207 if (dbits % sbits) {
208 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
209 // d = s << (dbits-sbits);
210 dbits -= sbits;
211 do {
212 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
213 // d |= d >> sbits;
214 dbits -= sbits;
215 sbits *= 2;
216 } while(dbits>0);
217 return;
218 }
219
220 dbits -= sbits;
221 do {
222 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
223 // d |= d<<sbits;
224 s = d;
225 dbits -= sbits;
226 if (sbits*2 < dbits) {
227 sbits *= 2;
228 }
229 } while(dbits>0);
230 }
231
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)232 void GGLAssembler::downshift(
233 pixel_t& d, int component, component_t s, const reg_t& dither)
234 {
235 Scratch scratches(registerFile());
236
237 int sh = s.h;
238 int sl = s.l;
239 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
240 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
241 int sbits = sh - sl;
242
243 int dh = d.format.c[component].h;
244 int dl = d.format.c[component].l;
245 int dbits = dh - dl;
246 int dithering = 0;
247
248 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
249
250 if (sbits>dbits) {
251 // see if we need to dither
252 dithering = mDithering;
253 }
254
255 int ireg = d.reg;
256 if (!(d.flags & FIRST)) {
257 if (s.flags & CORRUPTIBLE) {
258 ireg = s.reg;
259 } else {
260 ireg = scratches.obtain();
261 }
262 }
263 d.flags &= ~FIRST;
264
265 if (maskHiBits) {
266 // we need to mask the high bits (and possibly the lowbits too)
267 // and we might be able to use immediate mask.
268 if (!dithering) {
269 // we don't do this if we only have maskLoBits because we can
270 // do it more efficiently below (in the case where dl=0)
271 const int offset = sh - dbits;
272 if (dbits<=8 && offset >= 0) {
273 const uint32_t mask = ((1<<dbits)-1) << offset;
274 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
275 build_and_immediate(ireg, s.reg, mask, 32);
276 sl = offset;
277 s.reg = ireg;
278 sbits = dbits;
279 maskLoBits = maskHiBits = 0;
280 }
281 }
282 } else {
283 // in the dithering case though, we need to preserve the lower bits
284 const uint32_t mask = ((1<<sbits)-1) << sl;
285 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
286 build_and_immediate(ireg, s.reg, mask, 32);
287 s.reg = ireg;
288 maskLoBits = maskHiBits = 0;
289 }
290 }
291 }
292
293 // XXX: we could special case (maskHiBits & !maskLoBits)
294 // like we do for maskLoBits below, but it happens very rarely
295 // that we have maskHiBits only and the conditions necessary to lead
296 // to better code (like doing d |= s << 24)
297
298 if (maskHiBits) {
299 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
300 sl += 32-sh;
301 sh = 32;
302 s.reg = ireg;
303 maskHiBits = 0;
304 }
305
306 // Downsampling should be performed as follows:
307 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
308 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
309 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
310 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
311 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
312 //
313 // By approximating (1>>dbits) and (1>>sbits) to 0:
314 //
315 // V>>(sbits-dbits) - V>>sbits
316 //
317 // A good approximation is V>>(sbits-dbits),
318 // but better one (needed for dithering) is:
319 //
320 // (V>>(sbits-dbits)<<sbits - V)>>sbits
321 // (V<<dbits - V)>>sbits
322 // (V - V>>dbits)>>(sbits-dbits)
323
324 // Dithering is done here
325 if (dithering) {
326 comment("dithering");
327 if (sl) {
328 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
329 sh -= sl;
330 sl = 0;
331 s.reg = ireg;
332 }
333 // scaling (V-V>>dbits)
334 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
335 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
336 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
337 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
338 else ADD(AL, 0, ireg, ireg, dither.reg);
339 s.reg = ireg;
340 }
341
342 if ((maskLoBits|dithering) && (sh > dbits)) {
343 int shift = sh-dbits;
344 if (dl) {
345 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
346 if (ireg == d.reg) {
347 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
348 } else {
349 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
350 }
351 } else {
352 if (ireg == d.reg) {
353 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
354 } else {
355 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
356 }
357 }
358 } else {
359 int shift = sh-dh;
360 if (shift>0) {
361 if (ireg == d.reg) {
362 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
363 } else {
364 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
365 }
366 } else if (shift<0) {
367 if (ireg == d.reg) {
368 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
369 } else {
370 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
371 }
372 } else {
373 if (ireg == d.reg) {
374 if (s.reg != d.reg) {
375 MOV(AL, 0, d.reg, s.reg);
376 }
377 } else {
378 ORR(AL, 0, d.reg, d.reg, s.reg);
379 }
380 }
381 }
382 }
383
384 }; // namespace android
385