1 /* libs/pixelflinger/codeflinger/load_store.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include <assert.h>
19 #include <stdio.h>
20 #include <cutils/log.h>
21 #include "GGLAssembler.h"
22
23 #ifdef __ARM_ARCH__
24 #include <machine/cpu-features.h>
25 #endif
26
27 namespace android {
28
29 // ----------------------------------------------------------------------------
30
store(const pointer_t & addr,const pixel_t & s,uint32_t flags)31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32 {
33 const int bits = addr.size;
34 const int inc = (flags & WRITE_BACK)?1:0;
35 switch (bits) {
36 case 32:
37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
38 else STR(AL, s.reg, addr.reg);
39 break;
40 case 24:
41 // 24 bits formats are a little special and used only for RGB
42 // 0x00BBGGRR is unpacked as R,G,B
43 STRB(AL, s.reg, addr.reg, immed12_pre(0));
44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45 STRB(AL, s.reg, addr.reg, immed12_pre(1));
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47 STRB(AL, s.reg, addr.reg, immed12_pre(2));
48 if (!(s.flags & CORRUPTIBLE)) {
49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50 }
51 if (inc)
52 ADD(AL, 0, addr.reg, addr.reg, imm(3));
53 break;
54 case 16:
55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
56 else STRH(AL, s.reg, addr.reg);
57 break;
58 case 8:
59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
60 else STRB(AL, s.reg, addr.reg);
61 break;
62 }
63 }
64
load(const pointer_t & addr,const pixel_t & s,uint32_t flags)65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66 {
67 Scratch scratches(registerFile());
68 int s0;
69
70 const int bits = addr.size;
71 const int inc = (flags & WRITE_BACK)?1:0;
72 switch (bits) {
73 case 32:
74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
75 else LDR(AL, s.reg, addr.reg);
76 break;
77 case 24:
78 // 24 bits formats are a little special and used only for RGB
79 // R,G,B is packed as 0x00BBGGRR
80 s0 = scratches.obtain();
81 if (s.reg != addr.reg) {
82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87 } else {
88 int s1 = scratches.obtain();
89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94 }
95 if (inc)
96 ADD(AL, 0, addr.reg, addr.reg, imm(3));
97 break;
98 case 16:
99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
100 else LDRH(AL, s.reg, addr.reg);
101 break;
102 case 8:
103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
104 else LDRB(AL, s.reg, addr.reg);
105 break;
106 }
107 }
108
extract(integer_t & d,int s,int h,int l,int bits)109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110 {
111 const int maskLen = h-l;
112
113 #ifdef __mips__
114 assert(maskLen<=11);
115 #else
116 assert(maskLen<=8);
117 #endif
118 assert(h);
119
120 #if __ARM_ARCH__ >= 7
121 const int mask = (1<<maskLen)-1;
122 if ((h == bits) && !l && (s != d.reg)) {
123 MOV(AL, 0, d.reg, s); // component = packed;
124 } else if ((h == bits) && l) {
125 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
126 } else if (!l && isValidImmediate(mask)) {
127 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
128 } else if (!l && isValidImmediate(~mask)) {
129 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
130 } else {
131 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
132 }
133 #else
134 if (h != bits) {
135 const int mask = ((1<<maskLen)-1) << l;
136 if (isValidImmediate(mask)) {
137 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
138 } else if (isValidImmediate(~mask)) {
139 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
140 } else {
141 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
142 l += 32-h;
143 h = 32;
144 }
145 s = d.reg;
146 }
147
148 if (l) {
149 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
150 s = d.reg;
151 }
152
153 if (s != d.reg) {
154 MOV(AL, 0, d.reg, s);
155 }
156 #endif
157
158 d.s = maskLen;
159 }
160
extract(integer_t & d,const pixel_t & s,int component)161 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
162 {
163 extract(d, s.reg,
164 s.format.c[component].h,
165 s.format.c[component].l,
166 s.size());
167 }
168
extract(component_t & d,const pixel_t & s,int component)169 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
170 {
171 integer_t r(d.reg, 32, d.flags);
172 extract(r, s.reg,
173 s.format.c[component].h,
174 s.format.c[component].l,
175 s.size());
176 d = component_t(r);
177 }
178
179
expand(integer_t & d,const component_t & s,int dbits)180 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
181 {
182 if (s.l || (s.flags & CLEAR_HI)) {
183 extract(d, s.reg, s.h, s.l, 32);
184 expand(d, d, dbits);
185 } else {
186 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
187 }
188 }
189
expand(component_t & d,const component_t & s,int dbits)190 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
191 {
192 integer_t r(d.reg, 32, d.flags);
193 expand(r, s, dbits);
194 d = component_t(r);
195 }
196
expand(integer_t & dst,const integer_t & src,int dbits)197 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
198 {
199 assert(src.size());
200
201 int sbits = src.size();
202 int s = src.reg;
203 int d = dst.reg;
204
205 // be sure to set 'dst' after we read 'src' as they may be identical
206 dst.s = dbits;
207 dst.flags = 0;
208
209 if (dbits<=sbits) {
210 if (s != d) {
211 MOV(AL, 0, d, s);
212 }
213 return;
214 }
215
216 if (sbits == 1) {
217 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
218 // d = (s<<dbits) - s;
219 return;
220 }
221
222 if (dbits % sbits) {
223 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
224 // d = s << (dbits-sbits);
225 dbits -= sbits;
226 do {
227 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
228 // d |= d >> sbits;
229 dbits -= sbits;
230 sbits *= 2;
231 } while(dbits>0);
232 return;
233 }
234
235 dbits -= sbits;
236 do {
237 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
238 // d |= d<<sbits;
239 s = d;
240 dbits -= sbits;
241 if (sbits*2 < dbits) {
242 sbits *= 2;
243 }
244 } while(dbits>0);
245 }
246
downshift(pixel_t & d,int component,component_t s,const reg_t & dither)247 void GGLAssembler::downshift(
248 pixel_t& d, int component, component_t s, const reg_t& dither)
249 {
250 const needs_t& needs = mBuilderContext.needs;
251 Scratch scratches(registerFile());
252
253 int sh = s.h;
254 int sl = s.l;
255 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
256 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
257 int sbits = sh - sl;
258
259 int dh = d.format.c[component].h;
260 int dl = d.format.c[component].l;
261 int dbits = dh - dl;
262 int dithering = 0;
263
264 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
265
266 if (sbits>dbits) {
267 // see if we need to dither
268 dithering = mDithering;
269 }
270
271 int ireg = d.reg;
272 if (!(d.flags & FIRST)) {
273 if (s.flags & CORRUPTIBLE) {
274 ireg = s.reg;
275 } else {
276 ireg = scratches.obtain();
277 }
278 }
279 d.flags &= ~FIRST;
280
281 if (maskHiBits) {
282 // we need to mask the high bits (and possibly the lowbits too)
283 // and we might be able to use immediate mask.
284 if (!dithering) {
285 // we don't do this if we only have maskLoBits because we can
286 // do it more efficiently below (in the case where dl=0)
287 const int offset = sh - dbits;
288 if (dbits<=8 && offset >= 0) {
289 const uint32_t mask = ((1<<dbits)-1) << offset;
290 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
291 build_and_immediate(ireg, s.reg, mask, 32);
292 sl = offset;
293 s.reg = ireg;
294 sbits = dbits;
295 maskLoBits = maskHiBits = 0;
296 }
297 }
298 } else {
299 // in the dithering case though, we need to preserve the lower bits
300 const uint32_t mask = ((1<<sbits)-1) << sl;
301 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
302 build_and_immediate(ireg, s.reg, mask, 32);
303 s.reg = ireg;
304 maskLoBits = maskHiBits = 0;
305 }
306 }
307 }
308
309 // XXX: we could special case (maskHiBits & !maskLoBits)
310 // like we do for maskLoBits below, but it happens very rarely
311 // that we have maskHiBits only and the conditions necessary to lead
312 // to better code (like doing d |= s << 24)
313
314 if (maskHiBits) {
315 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
316 sl += 32-sh;
317 sh = 32;
318 s.reg = ireg;
319 maskHiBits = 0;
320 }
321
322 // Downsampling should be performed as follows:
323 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
324 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
325 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
326 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
327 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
328 //
329 // By approximating (1>>dbits) and (1>>sbits) to 0:
330 //
331 // V>>(sbits-dbits) - V>>sbits
332 //
333 // A good approximation is V>>(sbits-dbits),
334 // but better one (needed for dithering) is:
335 //
336 // (V>>(sbits-dbits)<<sbits - V)>>sbits
337 // (V<<dbits - V)>>sbits
338 // (V - V>>dbits)>>(sbits-dbits)
339
340 // Dithering is done here
341 if (dithering) {
342 comment("dithering");
343 if (sl) {
344 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
345 sh -= sl;
346 sl = 0;
347 s.reg = ireg;
348 }
349 // scaling (V-V>>dbits)
350 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
351 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
352 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
353 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
354 else ADD(AL, 0, ireg, ireg, dither.reg);
355 s.reg = ireg;
356 }
357
358 if ((maskLoBits|dithering) && (sh > dbits)) {
359 int shift = sh-dbits;
360 if (dl) {
361 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
362 if (ireg == d.reg) {
363 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
364 } else {
365 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
366 }
367 } else {
368 if (ireg == d.reg) {
369 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
370 } else {
371 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
372 }
373 }
374 } else {
375 int shift = sh-dh;
376 if (shift>0) {
377 if (ireg == d.reg) {
378 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
379 } else {
380 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
381 }
382 } else if (shift<0) {
383 if (ireg == d.reg) {
384 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
385 } else {
386 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
387 }
388 } else {
389 if (ireg == d.reg) {
390 if (s.reg != d.reg) {
391 MOV(AL, 0, d.reg, s.reg);
392 }
393 } else {
394 ORR(AL, 0, d.reg, d.reg, s.reg);
395 }
396 }
397 }
398 }
399
400 }; // namespace android
401