1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "GGLAssembler.h"
22
23 namespace android {
24
25 // ----------------------------------------------------------------------------
26
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)27 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
28 {
29 const int bits = addr.size;
30 const int inc = (flags & WRITE_BACK)?1:0;
31 switch (bits) {
32 case 32:
33 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
34 else STR(AL, s.reg, addr.reg);
35 break;
36 case 24:
37 // 24 bits formats are a little special and used only for RGB
38 // 0x00BBGGRR is unpacked as R,G,B
39 STRB(AL, s.reg, addr.reg, immed12_pre(0));
40 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
41 STRB(AL, s.reg, addr.reg, immed12_pre(1));
42 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
43 STRB(AL, s.reg, addr.reg, immed12_pre(2));
44 if (!(s.flags & CORRUPTIBLE)) {
45 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
46 }
47 if (inc)
48 ADD(AL, 0, addr.reg, addr.reg, imm(3));
49 break;
50 case 16:
51 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
52 else STRH(AL, s.reg, addr.reg);
53 break;
54 case 8:
55 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
56 else STRB(AL, s.reg, addr.reg);
57 break;
58 }
59 }
60
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)61 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
62 {
63 Scratch scratches(registerFile());
64 int s0;
65
66 const int bits = addr.size;
67 const int inc = (flags & WRITE_BACK)?1:0;
68 switch (bits) {
69 case 32:
70 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
71 else LDR(AL, s.reg, addr.reg);
72 break;
73 case 24:
74 // 24 bits formats are a little special and used only for RGB
75 // R,G,B is packed as 0x00BBGGRR
76 s0 = scratches.obtain();
77 if (s.reg != addr.reg) {
78 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
79 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
80 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
81 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
82 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
83 } else {
84 int s1 = scratches.obtain();
85 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
86 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
87 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
88 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
89 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
90 }
91 if (inc)
92 ADD(AL, 0, addr.reg, addr.reg, imm(3));
93 break;
94 case 16:
95 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
96 else LDRH(AL, s.reg, addr.reg);
97 break;
98 case 8:
99 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
100 else LDRB(AL, s.reg, addr.reg);
101 break;
102 }
103 }
104
extract(integer_t & d,int s,int h,int l,int bits)105 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
106 {
107 const int maskLen = h-l;
108
109 #ifdef __mips__
110 assert(maskLen<=11);
111 #else
112 assert(maskLen<=8);
113 #endif
114 assert(h);
115
116 if (h != bits) {
117 const int mask = ((1<<maskLen)-1) << l;
118 if (isValidImmediate(mask)) {
119 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
120 } else if (isValidImmediate(~mask)) {
121 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
122 } else {
123 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
124 l += 32-h;
125 h = 32;
126 }
127 s = d.reg;
128 }
129
130 if (l) {
131 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
132 s = d.reg;
133 }
134
135 if (s != d.reg) {
136 MOV(AL, 0, d.reg, s);
137 }
138
139 d.s = maskLen;
140 }
141
extract(integer_t & d,const pixel_t & s,int component)142 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
143 {
144 extract(d, s.reg,
145 s.format.c[component].h,
146 s.format.c[component].l,
147 s.size());
148 }
149
extract(component_t & d,const pixel_t & s,int component)150 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
151 {
152 integer_t r(d.reg, 32, d.flags);
153 extract(r, s.reg,
154 s.format.c[component].h,
155 s.format.c[component].l,
156 s.size());
157 d = component_t(r);
158 }
159
160
expand(integer_t & d,const component_t & s,int dbits)161 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
162 {
163 if (s.l || (s.flags & CLEAR_HI)) {
164 extract(d, s.reg, s.h, s.l, 32);
165 expand(d, d, dbits);
166 } else {
167 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
168 }
169 }
170
expand(component_t & d,const component_t & s,int dbits)171 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
172 {
173 integer_t r(d.reg, 32, d.flags);
174 expand(r, s, dbits);
175 d = component_t(r);
176 }
177
expand(integer_t & dst,const integer_t & src,int dbits)178 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
179 {
180 assert(src.size());
181
182 int sbits = src.size();
183 int s = src.reg;
184 int d = dst.reg;
185
186 // be sure to set 'dst' after we read 'src' as they may be identical
187 dst.s = dbits;
188 dst.flags = 0;
189
190 if (dbits<=sbits) {
191 if (s != d) {
192 MOV(AL, 0, d, s);
193 }
194 return;
195 }
196
197 if (sbits == 1) {
198 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
199 // d = (s<<dbits) - s;
200 return;
201 }
202
203 if (dbits % sbits) {
204 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
205 // d = s << (dbits-sbits);
206 dbits -= sbits;
207 do {
208 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
209 // d |= d >> sbits;
210 dbits -= sbits;
211 sbits *= 2;
212 } while(dbits>0);
213 return;
214 }
215
216 dbits -= sbits;
217 do {
218 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
219 // d |= d<<sbits;
220 s = d;
221 dbits -= sbits;
222 if (sbits*2 < dbits) {
223 sbits *= 2;
224 }
225 } while(dbits>0);
226 }
227
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)228 void GGLAssembler::downshift(
229 pixel_t& d, int component, component_t s, const reg_t& dither)
230 {
231 const needs_t& needs = mBuilderContext.needs;
232 Scratch scratches(registerFile());
233
234 int sh = s.h;
235 int sl = s.l;
236 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
237 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
238 int sbits = sh - sl;
239
240 int dh = d.format.c[component].h;
241 int dl = d.format.c[component].l;
242 int dbits = dh - dl;
243 int dithering = 0;
244
245 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
246
247 if (sbits>dbits) {
248 // see if we need to dither
249 dithering = mDithering;
250 }
251
252 int ireg = d.reg;
253 if (!(d.flags & FIRST)) {
254 if (s.flags & CORRUPTIBLE) {
255 ireg = s.reg;
256 } else {
257 ireg = scratches.obtain();
258 }
259 }
260 d.flags &= ~FIRST;
261
262 if (maskHiBits) {
263 // we need to mask the high bits (and possibly the lowbits too)
264 // and we might be able to use immediate mask.
265 if (!dithering) {
266 // we don't do this if we only have maskLoBits because we can
267 // do it more efficiently below (in the case where dl=0)
268 const int offset = sh - dbits;
269 if (dbits<=8 && offset >= 0) {
270 const uint32_t mask = ((1<<dbits)-1) << offset;
271 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
272 build_and_immediate(ireg, s.reg, mask, 32);
273 sl = offset;
274 s.reg = ireg;
275 sbits = dbits;
276 maskLoBits = maskHiBits = 0;
277 }
278 }
279 } else {
280 // in the dithering case though, we need to preserve the lower bits
281 const uint32_t mask = ((1<<sbits)-1) << sl;
282 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
283 build_and_immediate(ireg, s.reg, mask, 32);
284 s.reg = ireg;
285 maskLoBits = maskHiBits = 0;
286 }
287 }
288 }
289
290 // XXX: we could special case (maskHiBits & !maskLoBits)
291 // like we do for maskLoBits below, but it happens very rarely
292 // that we have maskHiBits only and the conditions necessary to lead
293 // to better code (like doing d |= s << 24)
294
295 if (maskHiBits) {
296 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
297 sl += 32-sh;
298 sh = 32;
299 s.reg = ireg;
300 maskHiBits = 0;
301 }
302
303 // Downsampling should be performed as follows:
304 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
305 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
306 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
307 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
308 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
309 //
310 // By approximating (1>>dbits) and (1>>sbits) to 0:
311 //
312 // V>>(sbits-dbits) - V>>sbits
313 //
314 // A good approximation is V>>(sbits-dbits),
315 // but better one (needed for dithering) is:
316 //
317 // (V>>(sbits-dbits)<<sbits - V)>>sbits
318 // (V<<dbits - V)>>sbits
319 // (V - V>>dbits)>>(sbits-dbits)
320
321 // Dithering is done here
322 if (dithering) {
323 comment("dithering");
324 if (sl) {
325 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
326 sh -= sl;
327 sl = 0;
328 s.reg = ireg;
329 }
330 // scaling (V-V>>dbits)
331 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
332 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
333 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
334 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
335 else ADD(AL, 0, ireg, ireg, dither.reg);
336 s.reg = ireg;
337 }
338
339 if ((maskLoBits|dithering) && (sh > dbits)) {
340 int shift = sh-dbits;
341 if (dl) {
342 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
343 if (ireg == d.reg) {
344 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
345 } else {
346 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
347 }
348 } else {
349 if (ireg == d.reg) {
350 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
351 } else {
352 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
353 }
354 }
355 } else {
356 int shift = sh-dh;
357 if (shift>0) {
358 if (ireg == d.reg) {
359 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
360 } else {
361 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
362 }
363 } else if (shift<0) {
364 if (ireg == d.reg) {
365 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
366 } else {
367 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
368 }
369 } else {
370 if (ireg == d.reg) {
371 if (s.reg != d.reg) {
372 MOV(AL, 0, d.reg, s.reg);
373 }
374 } else {
375 ORR(AL, 0, d.reg, d.reg, s.reg);
376 }
377 }
378 }
379 }
380
381 }; // namespace android
382