1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "codeflinger/GGLAssembler.h"
22
23 #ifdef __ARM_ARCH__
24 #include <machine/cpu-features.h>
25 #endif
26
27 namespace android {
28
29 // ----------------------------------------------------------------------------
30
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33 const int bits = addr.size;
34 const int inc = (flags & WRITE_BACK)?1:0;
35 switch (bits) {
36 case 32:
37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
38 else STR(AL, s.reg, addr.reg);
39 break;
40 case 24:
41 // 24 bits formats are a little special and used only for RGB
42 // 0x00BBGGRR is unpacked as R,G,B
43 STRB(AL, s.reg, addr.reg, immed12_pre(0));
44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45 STRB(AL, s.reg, addr.reg, immed12_pre(1));
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47 STRB(AL, s.reg, addr.reg, immed12_pre(2));
48 if (!(s.flags & CORRUPTIBLE)) {
49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50 }
51 if (inc)
52 ADD(AL, 0, addr.reg, addr.reg, imm(3));
53 break;
54 case 16:
55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
56 else STRH(AL, s.reg, addr.reg);
57 break;
58 case 8:
59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
60 else STRB(AL, s.reg, addr.reg);
61 break;
62 }
63 }
64
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67 Scratch scratches(registerFile());
68 int s0;
69
70 const int bits = addr.size;
71 const int inc = (flags & WRITE_BACK)?1:0;
72 switch (bits) {
73 case 32:
74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
75 else LDR(AL, s.reg, addr.reg);
76 break;
77 case 24:
78 // 24 bits formats are a little special and used only for RGB
79 // R,G,B is packed as 0x00BBGGRR
80 s0 = scratches.obtain();
81 if (s.reg != addr.reg) {
82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87 } else {
88 int s1 = scratches.obtain();
89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94 }
95 if (inc)
96 ADD(AL, 0, addr.reg, addr.reg, imm(3));
97 break;
98 case 16:
99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
100 else LDRH(AL, s.reg, addr.reg);
101 break;
102 case 8:
103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
104 else LDRB(AL, s.reg, addr.reg);
105 break;
106 }
107 }
108
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111 const int maskLen = h-l;
112
113 assert(maskLen<=8);
114 assert(h);
115
116 #if __ARM_ARCH__ >= 7
117 const int mask = (1<<maskLen)-1;
118 if ((h == bits) && !l && (s != d.reg)) {
119 MOV(AL, 0, d.reg, s); // component = packed;
120 } else if ((h == bits) && l) {
121 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
122 } else if (!l && isValidImmediate(mask)) {
123 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
124 } else if (!l && isValidImmediate(~mask)) {
125 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
126 } else {
127 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
128 }
129 #else
130 if (h != bits) {
131 const int mask = ((1<<maskLen)-1) << l;
132 if (isValidImmediate(mask)) {
133 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
134 } else if (isValidImmediate(~mask)) {
135 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
136 } else {
137 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
138 l += 32-h;
139 h = 32;
140 }
141 s = d.reg;
142 }
143
144 if (l) {
145 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
146 s = d.reg;
147 }
148
149 if (s != d.reg) {
150 MOV(AL, 0, d.reg, s);
151 }
152 #endif
153
154 d.s = maskLen;
155 }
156
extract(integer_t & d,const pixel_t & s,int component)157 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
158 {
159 extract(d, s.reg,
160 s.format.c[component].h,
161 s.format.c[component].l,
162 s.size());
163 }
164
extract(component_t & d,const pixel_t & s,int component)165 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
166 {
167 integer_t r(d.reg, 32, d.flags);
168 extract(r, s.reg,
169 s.format.c[component].h,
170 s.format.c[component].l,
171 s.size());
172 d = component_t(r);
173 }
174
175
expand(integer_t & d,const component_t & s,int dbits)176 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
177 {
178 if (s.l || (s.flags & CLEAR_HI)) {
179 extract(d, s.reg, s.h, s.l, 32);
180 expand(d, d, dbits);
181 } else {
182 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
183 }
184 }
185
expand(component_t & d,const component_t & s,int dbits)186 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
187 {
188 integer_t r(d.reg, 32, d.flags);
189 expand(r, s, dbits);
190 d = component_t(r);
191 }
192
expand(integer_t & dst,const integer_t & src,int dbits)193 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
194 {
195 assert(src.size());
196
197 int sbits = src.size();
198 int s = src.reg;
199 int d = dst.reg;
200
201 // be sure to set 'dst' after we read 'src' as they may be identical
202 dst.s = dbits;
203 dst.flags = 0;
204
205 if (dbits<=sbits) {
206 if (s != d) {
207 MOV(AL, 0, d, s);
208 }
209 return;
210 }
211
212 if (sbits == 1) {
213 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
214 // d = (s<<dbits) - s;
215 return;
216 }
217
218 if (dbits % sbits) {
219 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
220 // d = s << (dbits-sbits);
221 dbits -= sbits;
222 do {
223 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
224 // d |= d >> sbits;
225 dbits -= sbits;
226 sbits *= 2;
227 } while(dbits>0);
228 return;
229 }
230
231 dbits -= sbits;
232 do {
233 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
234 // d |= d<<sbits;
235 s = d;
236 dbits -= sbits;
237 if (sbits*2 < dbits) {
238 sbits *= 2;
239 }
240 } while(dbits>0);
241 }
242
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)243 void GGLAssembler::downshift(
244 pixel_t& d, int component, component_t s, const reg_t& dither)
245 {
246 const needs_t& needs = mBuilderContext.needs;
247 Scratch scratches(registerFile());
248
249 int sh = s.h;
250 int sl = s.l;
251 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
252 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
253 int sbits = sh - sl;
254
255 int dh = d.format.c[component].h;
256 int dl = d.format.c[component].l;
257 int dbits = dh - dl;
258 int dithering = 0;
259
260 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
261
262 if (sbits>dbits) {
263 // see if we need to dither
264 dithering = mDithering;
265 }
266
267 int ireg = d.reg;
268 if (!(d.flags & FIRST)) {
269 if (s.flags & CORRUPTIBLE) {
270 ireg = s.reg;
271 } else {
272 ireg = scratches.obtain();
273 }
274 }
275 d.flags &= ~FIRST;
276
277 if (maskHiBits) {
278 // we need to mask the high bits (and possibly the lowbits too)
279 // and we might be able to use immediate mask.
280 if (!dithering) {
281 // we don't do this if we only have maskLoBits because we can
282 // do it more efficiently below (in the case where dl=0)
283 const int offset = sh - dbits;
284 if (dbits<=8 && offset >= 0) {
285 const uint32_t mask = ((1<<dbits)-1) << offset;
286 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287 build_and_immediate(ireg, s.reg, mask, 32);
288 sl = offset;
289 s.reg = ireg;
290 sbits = dbits;
291 maskLoBits = maskHiBits = 0;
292 }
293 }
294 } else {
295 // in the dithering case though, we need to preserve the lower bits
296 const uint32_t mask = ((1<<sbits)-1) << sl;
297 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
298 build_and_immediate(ireg, s.reg, mask, 32);
299 s.reg = ireg;
300 maskLoBits = maskHiBits = 0;
301 }
302 }
303 }
304
305 // XXX: we could special case (maskHiBits & !maskLoBits)
306 // like we do for maskLoBits below, but it happens very rarely
307 // that we have maskHiBits only and the conditions necessary to lead
308 // to better code (like doing d |= s << 24)
309
310 if (maskHiBits) {
311 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
312 sl += 32-sh;
313 sh = 32;
314 s.reg = ireg;
315 maskHiBits = 0;
316 }
317
318 // Downsampling should be performed as follows:
319 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
320 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
321 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
322 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
323 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
324 //
325 // By approximating (1>>dbits) and (1>>sbits) to 0:
326 //
327 // V>>(sbits-dbits) - V>>sbits
328 //
329 // A good approximation is V>>(sbits-dbits),
330 // but better one (needed for dithering) is:
331 //
332 // (V>>(sbits-dbits)<<sbits - V)>>sbits
333 // (V<<dbits - V)>>sbits
334 // (V - V>>dbits)>>(sbits-dbits)
335
336 // Dithering is done here
337 if (dithering) {
338 comment("dithering");
339 if (sl) {
340 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
341 sh -= sl;
342 sl = 0;
343 s.reg = ireg;
344 }
345 // scaling (V-V>>dbits)
346 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
347 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
348 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
349 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
350 else ADD(AL, 0, ireg, ireg, dither.reg);
351 s.reg = ireg;
352 }
353
354 if ((maskLoBits|dithering) && (sh > dbits)) {
355 int shift = sh-dbits;
356 if (dl) {
357 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
358 if (ireg == d.reg) {
359 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
360 } else {
361 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
362 }
363 } else {
364 if (ireg == d.reg) {
365 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
366 } else {
367 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
368 }
369 }
370 } else {
371 int shift = sh-dh;
372 if (shift>0) {
373 if (ireg == d.reg) {
374 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
375 } else {
376 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
377 }
378 } else if (shift<0) {
379 if (ireg == d.reg) {
380 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
381 } else {
382 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
383 }
384 } else {
385 if (ireg == d.reg) {
386 if (s.reg != d.reg) {
387 MOV(AL, 0, d.reg, s.reg);
388 }
389 } else {
390 ORR(AL, 0, d.reg, d.reg, s.reg);
391 }
392 }
393 }
394 }
395
396 }; // namespace android
397